permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / unparse_c.ml
1 (* Yoann Padioleau, Julia Lawall
2 *
3 * Copyright (C) 2012, INRIA.
4 * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA.
5 * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU
6 *
7 * This program is free software; you can redistribute it and/or
8 * modify it under the terms of the GNU General Public License (GPL)
9 * version 2 as published by the Free Software Foundation.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * file license.txt for more details.
15 *
16 *
17 * Modifications by Julia Lawall for better newline handling.
18 *)
19 open Common
20
21 module TH = Token_helpers
22
23 (* should keep comments and directives in between adjacent deleted terms,
24 but not comments and directives within deleted terms. should use the
25 labels found in the control-flow graph *)
26
27
28
29 (*****************************************************************************)
30 (* Wrappers *)
31 (*****************************************************************************)
32 let pr2, pr2_once = mk_pr2_wrappers Flag_parsing_c.verbose_unparsing
33
34 (*****************************************************************************)
35 (* Types used during the intermediate phases of the unparsing *)
36 (*****************************************************************************)
37
38 type token1 =
39 | Fake1 of Ast_c.info
40 | T1 of Parser_c.token
41
42 (* The cocci_tag of the token should always be a NOTHING. The mark of
43 * the token can only be OriginTok or ExpandedTok. Why not get rid of
44 * token and get something simpler ? because we need to know if the
45 * info is a TCommentCpp or TCommentSpace, etc for some of the further
46 * analysis so easier to keep with the token.
47 *
48 * This type contains the whole information. Have all the tokens with this
49 * type.
50 *)
51 type min =
52 | Min of (int list (* match numbers from witness trees *) *
53 Ast_cocci.adjacency (* adjacency information *))
54 | Ctx
55
56 type token2 =
57 | T2 of Parser_c.token * min
58 * int option (* orig index, abstracting away comments and space *)
59 * Unparse_cocci.nlhint option
60 | Fake2 of min
61 | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *)
62 * Unparse_cocci.nlhint option
63 | C2 of string
64 | Comma of string
65 | Indent_cocci2
66 | Unindent_cocci2 of bool (* true for permanent, false for temporary *)
67 | EatSpace2
68
69 (* not used yet *)
70 type token3 =
71 | T3 of Parser_c.token
72 | Cocci3 of string
73 | C3 of string
74
75
76 (* similar to the tech in parsing_hack *)
77 type token_extended =
78 { tok2 : token2;
79 str : string;
80 idx : int option; (* to know if 2 tokens were consecutive in orig file *)
81 mutable new_tokens_before : token2 list;
82 mutable remove : bool;
83 }
84
85
86 (*****************************************************************************)
87 (* Helpers *)
88 (*****************************************************************************)
89
90 let info_of_token1 t =
91 match t with
92 | Fake1 info -> info
93 | T1 tok -> TH.info_of_tok tok
94
95 let print_token1 = function
96 | T1 tok -> TH.str_of_tok tok
97 | Fake1 info -> "fake"
98
99 let str_of_token2 = function
100 | T2 (t,_,_,_) -> TH.str_of_tok t
101 | Cocci2 (s,_,_,_,_)
102 | C2 s
103 | Comma s -> s
104 | Fake2 _
105 | Indent_cocci2
106 | Unindent_cocci2 _
107 | EatSpace2 -> ""
108
109 let print_token2 = function
110 | T2 (t,b,_,_) ->
111 let t_str =
112 match t with
113 | Parser_c.TCommentSpace _ -> " sp "
114 | Parser_c.TCommentNewline _ -> " nl "
115 | Parser_c.TCommentCpp _ -> " cp "
116 | Parser_c.TCommentMisc _ -> " misc "
117 | Parser_c.TComment _ -> " comment "
118 | _ -> "" in
119 let b_str =
120 match b with
121 | Min (index,adj) ->
122 Printf.sprintf "-.%d[%s]"
123 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
124 (String.concat " " (List.map string_of_int index))
125 | Ctx -> "" in
126 "T2:"^b_str^t_str^TH.str_of_tok t
127 | Fake2 b ->
128 let b_str =
129 match b with
130 | Min (index,adj) ->
131 Printf.sprintf "-%d[%s]"
132 (match adj with Ast_cocci.ADJ n -> n | _ -> -1)
133 (String.concat " " (List.map string_of_int index))
134 | Ctx -> "" in
135 b_str^"fake"
136 | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s
137 | C2 s -> "C2:"^s
138 | Comma s -> "Comma:"^s
139 | Indent_cocci2 -> "Indent"
140 | Unindent_cocci2 _ -> "Unindent"
141 | EatSpace2 -> "EatSpace"
142
143 (*
144 let simple_print_all_tokens pr l =
145 List.iter (function x -> Printf.printf "|%s| " (pr x)) l;
146 Printf.printf "\n"
147 *)
148
149 let str_of_token3 = function
150 | T3 t -> TH.str_of_tok t
151 | Cocci3 s | C3 s -> s
152
153
154
155 let mk_token_extended x =
156 let origidx =
157 match x with
158 | T2 (_,_,idx,_) -> idx
159 | _ -> None in
160 { tok2 = x;
161 str = str_of_token2 x;
162 idx = origidx;
163 new_tokens_before = [];
164 remove = false;
165 }
166
167 let rebuild_tokens_extented toks_ext =
168 let _tokens = ref [] in
169 toks_ext +> List.iter (fun tok ->
170 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
171 if not tok.remove then push2 tok.tok2 _tokens;
172 );
173 let tokens = List.rev !_tokens in
174 (tokens +> List.map mk_token_extended)
175
176
177 let mcode_contain_plus = function
178 | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false
179 | Ast_cocci.CONTEXT _ -> true
180 (* patch: when need full coccinelle transformation *)
181 | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false
182 | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*)
183 | Ast_cocci.PLUS _ -> raise (Impossible 132)
184
185 let contain_plus info =
186 let mck = Ast_c.mcode_of_info info in
187 mcode_contain_plus mck
188
189 (*****************************************************************************)
190 (* Last fix on the ast *)
191 (*****************************************************************************)
192
193 (* Because of the ugly trick to handle initialiser, I generate fake ','
194 * for the last initializer element, but if there is nothing around it,
195 * I don't want in the end to print it.
196 *)
197
198 let remove_useless_fakeInfo_struct program =
199 let bigf = { Visitor_c.default_visitor_c_s with
200 Visitor_c.kini_s = (fun (k,bigf) ini ->
201 match k ini with
202 | Ast_c.InitList args, ii ->
203 (match ii with
204 | [_;_] -> ini
205 | i1 :: i2 :: iicommaopt :: tl when
206 (not (contain_plus iicommaopt))
207 && (not (contain_plus i2))
208 && (Ast_c.is_fake iicommaopt) ->
209 (* sometimes the guy put a normal iicommaopt *)
210 Ast_c.InitList args, (i1 :: i2 :: tl)
211 | ii -> Ast_c.InitList args, ii
212 )
213 | x -> x)
214 } in
215 Visitor_c.vk_toplevel_s bigf program
216
217
218 (*****************************************************************************)
219 (* Tokens1 generation *)
220 (*****************************************************************************)
221
222 let get_fakeInfo_and_tokens celem toks =
223
224 let toks_in = ref toks in
225 let toks_out = ref [] in
226
227 (* todo? verify good order of position ? *)
228 let pr_elem info =
229 match Ast_c.pinfo_of_info info with
230 | Ast_c.FakeTok _ ->
231 push2 (Fake1 info) toks_out
232 | Ast_c.OriginTok _ | Ast_c.ExpandedTok _ ->
233
234 (* get the associated comments/space/cppcomment tokens *)
235 let (before, x, after) =
236 !toks_in +> split_when (fun tok ->
237 info =*= TH.info_of_tok tok)
238 in
239 assert(info =*= TH.info_of_tok x);
240 (*old: assert(before +> List.for_all (TH.is_comment)); *)
241 before +> List.iter (fun x ->
242 if not (TH.is_comment x)
243 then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x)
244 (* case such as int asm d3("x"); not yet in ast *)
245 );
246 before +> List.iter (fun x -> push2 (T1 x) toks_out);
247
248 push2 (T1 x) toks_out;
249 toks_in := after;
250 | Ast_c.AbstractLineTok _ ->
251 (* can be called on type info when for instance use -type_c *)
252 if !Flag_parsing_c.pretty_print_type_info
253 then push2 (Fake1 info) toks_out
254 else raise (Impossible 134) (* at this stage *)
255 in
256
257 let pr_space _ = () in (* use the spacing that is there already *)
258
259 Pretty_print_c.pp_program_gen pr_elem pr_space celem;
260
261 if not (null !toks_in)
262 then failwith "WEIRD: unparsing not finished";
263
264 List.rev !toks_out
265
266 (* Fake nodes that have BEFORE code or are - should be moved over any subsequent
267 whitespace and newlines, but not any comments, to get as close to the affected
268 code as possible. Similarly, fake nodes that have AFTER code should be moved
269 backwards. No fake nodes should have both before and after code. *)
270
271 let displace_fake_nodes toks =
272 let is_fake = function Fake1 _ -> true | _ -> false in
273 let is_whitespace = function
274 | T1(Parser_c.TCommentSpace _)
275 (* patch: cocci *)
276 | T1(Parser_c.TCommentNewline _) -> true
277 | _ -> false in
278 let rec loop toks =
279 let fake_info =
280 try Some (split_when is_fake toks)
281 with Not_found -> None in
282 match fake_info with
283 | Some(bef,((Fake1 info) as fake),aft) ->
284 (match !(info.Ast_c.cocci_tag) with
285 | Some x ->
286 (match x with
287 | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_)
288 (* for , replacement is more likely to be like after, but not clear...
289 but treating it as after breaks a lot of tests. *)
290
291 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) ->
292 (* move the fake node forwards *)
293 let (whitespace,rest) = span is_whitespace aft in
294 bef @ whitespace @ fake :: (loop rest)
295
296 | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) ->
297 (* move the fake node backwards *)
298 let revbef = List.rev bef in
299 let (revwhitespace,revprev) = span is_whitespace revbef in
300 let whitespace = List.rev revwhitespace in
301 let prev = List.rev revprev in
302 prev @ fake :: (loop (whitespace @ aft))
303 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) ->
304 failwith "fake node should not be before-after"
305 | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_)
306 | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *)
307 )
308 | None ->
309 bef @ fake :: (loop aft)
310 )
311 | None -> toks
312 | _ -> raise (Impossible 135) in
313 loop toks
314
315 (*****************************************************************************)
316 (* Tokens2 generation *)
317 (*****************************************************************************)
318
319 let comment2t2 = function
320 | (Token_c.TCommentCpp
321 (* not sure iif the following list is exhaustive or complete *)
322 (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError),
323 (info : Token_c.info)) ->
324 C2(info.Common.str)
325 | (Token_c.TCommentCpp x,(info : Token_c.info)) ->
326 C2("\n"^info.Common.str^"\n")
327 | x -> failwith (Printf.sprintf "unexpected comment %s" (Dumper.dump x))
328
329 let expand_mcode toks =
330 let toks_out = ref [] in
331
332 let index = ref 0 in
333
334 let add_elem t minus =
335 match t with
336 | Fake1 info ->
337 let str = Ast_c.str_of_info info in
338 let isminus = match minus with Min _ -> true | Ctx -> false in
339 (* don't add fake string if the thing should be removed *)
340 if str =$= "" or isminus
341 then push2 (Fake2 minus) toks_out
342 (* fx the fake "," at the end of a structure or enum.
343 no idea what other fake info there can be... *)
344 else push2 (Comma str) toks_out
345
346 | T1 tok ->
347 (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*)
348 (* no tag on expandedTok ! *)
349 let modified = function
350 | None -> false
351 | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false
352 | _ -> true in
353 (if TH.is_expanded tok &&
354 modified !((TH.info_of_tok tok).Ast_c.cocci_tag)
355 (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)
356 then
357 failwith
358 (Printf.sprintf
359 "expanded token %s on line %d is either modified or stored in a metavariable"
360 (TH.str_of_tok tok) (TH.line_of_tok tok)));
361
362 let tok' = tok +> TH.visitor_info_of_tok (fun i ->
363 { i with Ast_c.cocci_tag = ref Ast_c.emptyAnnot; }
364 ) in
365
366 let optindex =
367 if TH.is_origin tok && not (TH.is_real_comment tok)
368 then
369 begin
370 incr index;
371 Some !index
372 end
373 else None
374 in
375
376 push2 (T2 (tok', minus, optindex, None)) toks_out
377 in
378
379 let expand_info t =
380 let (mcode,env) =
381 Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).Ast_c.cocci_tag) in
382
383 let pr_cocci s ln col rcol hint =
384 push2 (Cocci2 (s,ln,col,rcol,hint)) toks_out in
385 let pr_c info =
386 (match Ast_c.pinfo_of_info info with
387 | Ast_c.AbstractLineTok _ ->
388 push2 (C2 (Ast_c.str_of_info info)) toks_out
389 | Ast_c.FakeTok (s,_) ->
390 push2 (C2 s) toks_out
391 | _ ->
392 Printf.fprintf stderr "line: %s\n" (Dumper.dump info);
393 failwith "not an abstract line"
394 );
395 (!(info.Ast_c.comments_tag)).Ast_c.mafter +>
396 List.iter (fun x -> push2 (comment2t2 x) toks_out) in
397
398 let pr_barrier ln col = (* marks a position, used around C code *)
399 push2 (Cocci2 ("",ln,col,col,None)) toks_out in
400 let pr_nobarrier ln col = () in (* not needed for linux spacing *)
401
402 let pr_cspace _ = push2 (C2 " ") toks_out in
403
404 let pr_space _ = () (* rely on add_space in cocci code *) in
405 let pr_arity _ = () (* not interested *) in
406
407 let indent _ = push2 Indent_cocci2 toks_out in
408 let unindent x = push2 (Unindent_cocci2 x) toks_out in
409 let eat_space _ = push2 EatSpace2 toks_out in
410
411 let args_pp =
412 (env, pr_cocci, pr_c, pr_cspace,
413 (match !Flag_parsing_c.spacing with
414 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace),
415 pr_arity,
416 (match !Flag_parsing_c.spacing with
417 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier),
418 indent, unindent, eat_space) in
419
420 (* old: when for yacfe with partial cocci:
421 * add_elem t false;
422 *)
423
424 (* patch: when need full coccinelle transformation *)
425 let unparser = Unparse_cocci.pp_list_list_any args_pp false in
426 match mcode with
427 | Ast_cocci.MINUS (_,inst,adj,any_xxs) ->
428 (* Why adding ? because I want to have all the information, the whole
429 * set of tokens, so I can then process and remove the
430 * is_between_two_minus for instance *)
431 add_elem t (Min (inst,adj));
432 (match any_xxs with
433 | Ast_cocci.NOREPLACEMENT -> ()
434 | Ast_cocci.REPLACEMENT(any_xxs,_) ->
435 unparser any_xxs Unparse_cocci.InPlace
436 )
437 | Ast_cocci.CONTEXT (_,any_befaft) ->
438 (match any_befaft with
439 | Ast_cocci.NOTHING ->
440 add_elem t Ctx
441 | Ast_cocci.BEFORE (xxs,_) ->
442 unparser xxs Unparse_cocci.Before;
443 add_elem t Ctx
444 | Ast_cocci.AFTER (xxs,_) ->
445 add_elem t Ctx;
446 unparser xxs Unparse_cocci.After;
447 | Ast_cocci.BEFOREAFTER (xxs, yys, _) ->
448 unparser xxs Unparse_cocci.Before;
449 add_elem t Ctx;
450 unparser yys Unparse_cocci.After;
451 )
452 | Ast_cocci.PLUS _ -> raise (Impossible 136)
453 in
454
455 toks +> List.iter expand_info;
456 List.rev !toks_out
457
458
459 (*****************************************************************************)
460 (* Tokens2 processing, filtering, adjusting *)
461 (*****************************************************************************)
462
463 let is_space = function
464 | T2(Parser_c.TCommentSpace _,_b,_i,_h) -> true (* only whitespace *)
465 | _ -> false
466
467 let is_newline = function
468 | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> true
469 | _ -> false
470
471 let is_whitespace x =
472 is_space x or is_newline x
473
474 let is_minusable_comment = function
475 | (T2 (t,_b,_i,_h)) ->
476 (match t with
477 | Parser_c.TCommentSpace _ (* only whitespace *)
478 (* patch: coccinelle *)
479 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
480 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
481 | Parser_c.TComment _
482 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
483 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
484 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
485 | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> (* result was false *)
486 true
487 (*
488 | Parser_c.TCommentMisc _
489 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
490 false
491 *)
492 | _ -> false
493 )
494 | _ -> false
495
496 let is_minusable_comment_nocpp = function
497 | (T2 (t,_b,_i,_h)) ->
498 (match t with
499 | Parser_c.TCommentSpace _ (* only whitespace *)
500 (* patch: coccinelle *)
501 | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true
502 | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false
503 | Parser_c.TComment _ -> true
504 (*
505 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
506 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
507 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
508 | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
509 false
510
511 | Parser_c.TCommentMisc _
512 | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) ->
513 false
514 *)
515 | _ -> false
516 )
517 | _ -> false
518
519 let all_coccis = function
520 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2
521 | Unindent_cocci2 _ | EatSpace2 -> true
522 | _ -> false
523
524 (* previously gave up if the first character was a newline, but not clear why *)
525 let is_minusable_comment_or_plus x =
526 is_minusable_comment x or all_coccis x
527
528 let set_minus_comment adj = function
529 | T2 (t,Ctx,idx,hint) ->
530 let str = TH.str_of_tok t in
531 (match t with
532 | Parser_c.TCommentSpace _
533 (* patch: coccinelle *)
534 | Parser_c.TCommentNewline _ -> ()
535
536 | Parser_c.TComment _
537 | Parser_c.TCommentCpp (Token_c.CppAttr, _)
538 | Parser_c.TCommentCpp (Token_c.CppMacro, _)
539 | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _)
540 | Parser_c.TCommentCpp (Token_c.CppDirective, _) ->
541 pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s"
542 (TH.line_of_tok t) str)
543 | _ -> raise (Impossible 137)
544 );
545 T2 (t, Min adj, idx, hint)
546 (* patch: coccinelle *)
547 | T2 (t, Min adj, idx, hint) as x -> x
548 | Fake2 _ as x -> x
549 | _ -> raise (Impossible 138)
550
551 (* don't touch ifdefs, done after *)
552 let set_minus_comment_or_plus adj = function
553 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2
554 | Unindent_cocci2 _ | EatSpace2 as x -> x
555 | x -> set_minus_comment adj x
556
557 let is_minus = function
558 | T2 (_, Min _, _, _) -> true
559 | _ -> false
560
561 let drop_minus xs =
562 xs +> exclude is_minus
563
564 let drop_expanded xs =
565 xs +> exclude (function
566 | T2 (t,_,_,_) when TH.is_expanded t -> true
567 | _ -> false
568 )
569
570 let drop_fake xs =
571 xs +> exclude (function
572 | Fake2 _ -> true
573 | _ -> false
574 )
575
576 let remove_minus_and_between_and_expanded_and_fake xs =
577
578 (* get rid of expanded tok *)
579 let xs = drop_expanded xs in
580
581 let minus_or_comment x =
582 is_minus x or is_minusable_comment x in
583
584 let minus_or_comment_nocpp x =
585 is_minus x or is_minusable_comment_nocpp x in
586
587 let common_adj (index1,adj1) (index2,adj2) =
588 let same_adj = (* same adjacency info *)
589 match (adj1,adj2) with
590 | (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2
591 | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in
592 same_adj &&
593 (* non-empty intersection of witness trees *)
594 not ((inter_set index1 index2) = []) in
595
596 (* new idea: collects regions not containing non-space context code
597 if two adjacent adjacent minus tokens satisfy common_adj then delete
598 all spaces, comments etc between them
599 if two adjacent minus tokens do not satisfy common_adj only delete
600 the spaces between them if there are no comments, etc.
601 if the region contain no plus code and is both preceded and followed
602 by a newline, delete the initial newline. *)
603
604 let rec adjust_around_minus = function
605 | [] -> []
606 | (T2(Parser_c.TCommentNewline c,_b,_i,_h) as x)::
607 ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
608 let (minus_list,rest) = span not_context (t1::xs) in
609 let contains_plus = List.exists is_plus minus_list in
610 let x =
611 match List.rev minus_list with
612 | (T2(Parser_c.TCommentNewline c,_b,_i,_h))::rest
613 when List.for_all minus_or_comment minus_list ->
614 set_minus_comment_or_plus adj1 x
615 | _ -> x in
616 x :: adjust_within_minus contains_plus minus_list
617 @ adjust_around_minus rest
618 | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
619 let (minus_list,rest) = span not_context (t1::xs) in
620 let contains_plus = List.exists is_plus minus_list in
621 adjust_within_minus contains_plus minus_list
622 @ adjust_around_minus rest
623 | x::xs ->
624 x :: adjust_around_minus xs
625 and adjust_within_minus cp (* contains plus *) = function
626 | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs ->
627 let not_minus = function T2(_,Min _,_,_) -> false | _ -> true in
628 let (not_minus_list,rest) = span not_minus xs in
629 t1 ::
630 (match rest with
631 | ((Fake2(Min adj2) | T2(_,Min adj2,_,_)) as t2)::xs ->
632 if common_adj adj1 adj2
633 || not cp && List.for_all is_whitespace not_minus_list
634 then
635 (List.map (set_minus_comment_or_plus adj1) not_minus_list)
636 @ (adjust_within_minus cp (t2::xs))
637 else
638 not_minus_list
639 @ (adjust_within_minus cp (t2::xs))
640 | _ ->
641 if cp
642 then xs
643 else
644 (* remove spaces after removed stuff, eg a comma after a
645 function argument *)
646 (let (spaces,rest) = span is_space xs in
647 (List.map (set_minus_comment_or_plus adj1) spaces)
648 @ rest)
649 )
650 | xs -> failwith "should always start with minus"
651 and not_context = function
652 | (T2(_,Ctx,_,_) as x) when not (is_minusable_comment x) -> false
653 | _ -> true
654 and is_plus = function
655 | C2 _ | Comma _ | Cocci2 _ -> true
656 | _ -> false in
657
658 let xs = adjust_around_minus xs in
659
660 (* get rid of fake tok *)
661 let xs = drop_fake xs in
662
663 (* this drops blank lines after a brace introduced by removing code *)
664 let minus_or_comment_nonl = function
665 | T2(_,Min adj,_,_) -> true
666 | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> false
667 | x -> is_minusable_comment x in
668
669 let rec adjust_after_brace = function
670 | [] -> []
671 | ((T2(_,Ctx,_,_)) as x)::((T2(_,Min adj,_,_)::_) as xs)
672 when str_of_token2 x =$= "{" ->
673 let (between_minus,rest) = span minus_or_comment_nonl xs in
674 let (newlines,rest) = span is_whitespace rest in
675 let (drop_newlines,last_newline) =
676 let rec loop = function
677 | [] -> ([],[])
678 | ((T2(Parser_c.TCommentNewline _,_b,_i,_h)) as x) :: rest ->
679 (List.rev rest,[x])
680 | x::xs ->
681 let (drop_newlines,last_newline) = loop xs in
682 (drop_newlines,x::last_newline) in
683 loop (List.rev newlines) in
684 x :: between_minus
685 @ List.map (set_minus_comment adj) drop_newlines
686 @ last_newline
687 @ adjust_after_brace rest
688 | x::xs -> x :: (adjust_after_brace xs) in
689
690 let xs = adjust_after_brace xs in
691
692 (* search backwards from context } over spaces until reaching a newline.
693 then go back over all minus code until reaching some context or + code.
694 get rid of all intervening spaces, newlines, and comments
695 input is reversed *)
696 let rec adjust_before_brace = function
697 | [] -> []
698 | ((T2(t,Ctx,_,_)) as x)::xs
699 when str_of_token2 x =$= "}" or is_newline x ->
700 let (outer_spaces,rest) = span is_space xs in
701 x :: outer_spaces @
702 (match rest with
703 | ((T2 (Parser_c.TCommentNewline _,Ctx,_i,_h)) as h) ::
704 (* the rest of this code is the same as from_newline below
705 but merging them seems to be error prone... *)
706 ((T2 (t, Min adj, idx, hint)) as m) :: rest ->
707 let (spaces,rest) = span minus_or_comment_nocpp rest in
708 h :: m ::
709 (List.map (set_minus_comment adj) spaces) @
710 (adjust_before_brace rest)
711 | _ -> adjust_before_brace rest
712 )
713 | x::xs -> x :: (adjust_before_brace xs) in
714
715 let from_newline = function
716 | ((T2 (t, Min adj, idx, hint)) as m) :: rest ->
717 let (spaces,rest) = span minus_or_comment_nocpp rest in
718 m ::
719 (List.map (set_minus_comment adj) spaces) @
720 (adjust_before_brace rest)
721 | ((T2 (t0,Ctx, idx0,h0)) as m0) ::
722 ((T2 (t,Min adj,idx,h)) as m) :: rest
723 when TH.str_of_tok t0 = "" ->
724 (* This is for the case of a #define that is completely deleted,
725 because a #define has a strange EOL token at the end.
726 We hope there i no other kind of token that is represented by
727 "", but it seems like changing the kind of token might break
728 the end of entity recognition in the C parser.
729 See parsing_hacks.ml *)
730 let (spaces,rest) = span minus_or_comment_nocpp rest in
731 m0 :: m ::
732 (List.map (set_minus_comment adj) spaces) @
733 (adjust_before_brace rest)
734 | rest -> adjust_before_brace rest in
735
736 let xs = List.rev (from_newline (List.rev xs)) in
737
738 let cleanup_ifdefs toks =
739 (* TODO: these functions are horrid, but using tokens caused circularity *)
740 let is_ifdef = function
741 | T2((Parser_c.TCommentCpp
742 (Token_c.CppIfDirective Token_c.IfDef, _)),m,idx,_) -> true
743 | T2((Parser_c.TCommentCpp
744 (Token_c.CppIfDirective Token_c.IfDef0, _)),m,idx,_) -> true
745 | t -> false in
746 let is_else = function
747 | T2((Parser_c.TCommentCpp
748 (Token_c.CppIfDirective Token_c.Else, _)),m,idx,_) -> true
749 | _ -> false in
750 let is_endif = function
751 | T2((Parser_c.TCommentCpp
752 (Token_c.CppIfDirective Token_c.Endif, _)),m,idx,_) -> true
753 | _ -> false in
754 let add t = function
755 | l::rest -> (t::l)::rest
756 | _ -> failwith "not possible" in
757 let rec parse_ifdef acc_keywords acc_code stack = function
758 | [] -> (None,acc_keywords,acc_code)
759 | t::rest when is_else t ->
760 (match stack with
761 | [] -> parse_ifdef (t::acc_keywords) ([]::acc_code) stack rest
762 | _ -> parse_ifdef acc_keywords (add t acc_code) stack rest
763 )
764 | t::rest when is_endif t ->
765 (match stack with
766 | [] -> ((Some (t,rest)),acc_keywords,acc_code)
767 | _::stack -> parse_ifdef acc_keywords (add t acc_code) stack rest
768 )
769 | t::rest when is_ifdef t ->
770 parse_ifdef acc_keywords (add t acc_code) (()::stack) rest
771 | t::rest -> parse_ifdef acc_keywords (add t acc_code) stack rest in
772 let unminus = function
773 | T2 (t,Min adj,idx,hint) -> T2 (t,Ctx,idx,hint)
774 | x -> x in
775 let rec loop = function
776 | [] -> []
777 | t::rest when is_ifdef t ->
778 let (ender,acc_keywords,acc_code) =
779 parse_ifdef [t] [[]] [] rest in
780 let acc_code = List.map loop acc_code in
781 let merge = (* args reversed *)
782 List.fold_left2
783 (fun prev kwd code -> kwd :: (List.rev code) @ prev)
784 [] in
785 (match ender with
786 | None -> merge (List.map unminus acc_keywords) acc_code
787 | Some(endif,rest) ->
788 let rest = loop rest in
789 if List.for_all is_minus (endif :: acc_keywords)
790 then (merge acc_keywords acc_code) @ (endif :: rest)
791 else
792 (merge (List.map unminus acc_keywords) acc_code) @
793 ((unminus endif) :: rest)
794 )
795 | x::xs -> x :: loop xs in
796 loop toks in
797
798 let xs = cleanup_ifdefs xs in
799 let xs = drop_minus xs in
800 xs
801
802 (* things that should not be followed by space - boundary between SmPL
803 code and C code *)
804 let adjust_eat_space toks =
805 let rec loop = function
806 | [] -> []
807 | EatSpace2 :: x :: rest when is_space x -> loop rest
808 | EatSpace2 :: rest -> loop rest
809 | x :: xs -> x :: loop xs in
810 loop toks
811
812 (* normally, in C code, a semicolon is not preceded by a space or newline *)
813 let adjust_before_semicolon toks =
814 let toks = List.rev toks in
815 let rec search_semic = function
816 | [] -> []
817 | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs
818 when List.mem (str_of_token2 x) [";";")";","] ->
819 x :: search_semic (search_minus false xs)
820 | x::xs -> x :: search_semic xs
821 and search_minus seen_minus xs =
822 let (spaces, rest) = span is_space xs in
823 (* only delete spaces if something is actually deleted *)
824 match rest with
825 | ((T2(_,Min _,_,_)) as a)::rerest -> a :: search_minus true rerest
826 | _ -> if seen_minus then rest else xs in
827 List.rev (search_semic toks)
828
829 (* normally, in C code, a ( is not followed by a space or newline *)
830 let adjust_after_paren toks =
831 let rec search_paren = function
832 | [] -> []
833 | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs
834 when List.mem (str_of_token2 x) ["("] (* other things? *) ->
835 x :: search_paren (search_minus false xs)
836 | x::xs -> x :: search_paren xs
837 and search_minus seen_minus xs =
838 let (spaces, rest) = span is_whitespace xs in
839 (* only delete spaces if something is actually deleted *)
840 match rest with
841 | ((T2(_,Min _,_,_)) as a)::rerest -> (* minus *)
842 a :: search_minus true rerest
843 | ((T2(_,Ctx,_,_)) as a)::rerest
844 when seen_minus && str_of_token2 a = "," ->
845 (* comma after ( will be deleted, so consider it as minus code
846 already *)
847 a :: search_minus true rerest
848 | _ -> if seen_minus then rest else xs in (* drop trailing space *)
849 search_paren toks
850
851 (* this is for the case where braces are added around an if branch *)
852 let paren_then_brace toks =
853 let rec search_paren = function
854 | [] -> []
855 | ((T2(_,Ctx,_,_)) as x)::xs
856 when List.mem (str_of_token2 x) [")"] ->
857 x :: search_paren (search_plus xs)
858 | x::xs -> x :: search_paren xs
859 and search_plus xs =
860 let (spaces, rest) = span is_space xs in
861 let (nls, rest) = span is_newline rest in
862 match rest with
863 (* move the brace up to the previous line *)
864 | ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) ->
865 spaces @ x :: nls @ rest
866 | _ -> xs in
867 search_paren toks
868
869 let is_ident_like s = s ==~ regexp_alpha
870
871 let rec drop_space_at_endline = function
872 | [] -> []
873 | [x] -> [x]
874 | (C2 " ") ::
875 ((((T2(Parser_c.TCommentSpace _,Ctx,_,_)) | Cocci2("\n",_,_,_,_) |
876 (T2(Parser_c.TCommentNewline _,Ctx,_,_))) :: _) as rest) ->
877 (* when unparse_cocci doesn't know whether space is needed *)
878 drop_space_at_endline rest
879 | ((T2(Parser_c.TCommentSpace _,Ctx,_i,_h)) as a)::rest ->
880 let (outer_spaces,rest) = span is_space rest in
881 let minus_or_comment_or_space_nocpp = function
882 | T2(_,Min adj,_,_) -> true
883 | (T2(Parser_c.TCommentSpace _,Ctx,_i,_)) -> true
884 | (T2(Parser_c.TCommentNewline _,Ctx,_i,_)) -> false
885 | x -> false in
886 let (minus,rest) = span minus_or_comment_or_space_nocpp rest in
887 let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in
888 if List.exists is_minus minus
889 then
890 match rest with
891 | ((T2(Parser_c.TCommentNewline _,Ctx,_i,_h)) as a)::rest ->
892 (* drop trailing spaces *)
893 minus @ a :: (drop_space_at_endline rest)
894 | _ -> fail ()
895 else fail ()
896 | a :: rest ->
897 a :: drop_space_at_endline rest
898
899 (* if a removed ( is between two tokens, then add a space *)
900 let rec paren_to_space = function
901 | [] -> []
902 | [x] -> [x]
903 | [x;y] -> [x;y]
904 | ((T2(_,Ctx,_,_)) as a)::
905 ((T2(t,Min _,_,_)) as b)::
906 ((T2(_,Ctx,_,_)) as c)::rest
907 when not (is_whitespace a) && TH.str_of_tok t = "(" ->
908 a :: b :: (C2 " ") :: (paren_to_space (c :: rest))
909 | a :: rest -> a :: (paren_to_space rest)
910
911 let rec add_space xs =
912 match xs with
913 | [] -> []
914 | [x] -> [x]
915 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
916 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
917 not (lnx = -1) && not (rcolx = -1) && lnx = lny && rcolx < lcoly ->
918 (* this only works within a line. could consider whether
919 something should be done to add newlines too, rather than
920 printing them explicitly in unparse_cocci. *)
921 x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs)
922 | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
923 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL &&
924 not (lnx = -1) && not (rcolx = -1) && lnx < lny ->
925 (* this only works within a line. could consider whether
926 something should be done to add newlines too, rather than
927 printing them explicitly in unparse_cocci. *)
928 x::C2 (String.make (lny-lnx) '\n')::
929 C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *)
930 add_space (y::xs)
931 | ((T2(_,Ctx,_,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *)
932 let sx = str_of_token2 x in
933 let sy = str_of_token2 y in
934 if is_ident_like sx && (is_ident_like sy or List.mem sy ["="])
935 then x::C2 " "::(add_space (y::xs))
936 else x::(add_space (y::xs))
937 | x::y::xs -> (* not boundary, not sure if it is possible *)
938 let sx = str_of_token2 x in
939 let sy = str_of_token2 y in
940 if is_ident_like sx && is_ident_like sy
941 then x::C2 " "::(add_space (y::xs))
942 else x::(add_space (y::xs))
943
944 (* A fake comma is added at the end of an unordered initlist or a enum
945 decl, if the initlist or enum doesn't already end in a comma. This is only
946 needed if there is + code, ie if we see Cocci after it in the code sequence *)
947
948 let rec drop_end_comma = function
949 | [] -> []
950 | [x] -> [x]
951 | ((Comma ",") as x) :: rest ->
952 let (newlines,rest2) = span is_whitespace rest in
953 (match rest2 with
954 | (Cocci2 _) :: _ -> x :: drop_end_comma rest
955 | _ -> drop_end_comma rest
956 )
957 | x :: xs -> x :: drop_end_comma xs
958
959 (* The following only works for the outermost function call. Stack records
960 the column of all open parentheses. Space_cell contains the most recent
961 comma in the outermost function call. The goal is to decide whether this
962 should be followed by a space or a newline and indent. *)
963 let add_newlines toks tabbing_unit =
964 (* the following is for strings that may contain newline or tabs *)
965 let string_length s count =
966 let l = list_of_string s in
967 List.fold_left
968 (function count ->
969 function
970 | '\t' -> count + 8
971 | '\n' -> 0
972 | c -> count + 1)
973 count l in
974 let create_indent n =
975 let (tu,tlen) =
976 match tabbing_unit with
977 | Some ("\t",_) -> ("\t",8)
978 | Some ("",_) -> ("\t",8) (* not sure why... *)
979 | Some (s,_) -> (s,string_length s 0) (* assuming only tabs or spaces *)
980 | None -> ("\t",8) in
981 let rec loop seen =
982 if seen + tlen <= n
983 then tu ^ loop (seen + tlen)
984 else String.make (n-seen) ' ' in
985 loop 0 in
986 let check_for_newline count x = function
987 | Some (start,space_cell) when count > Flag_parsing_c.max_width ->
988 space_cell := "\n"^(create_indent x);
989 Some (x + (count - start))
990 | _ -> None in
991 let start_box stack space_cell count s =
992 let count = string_length s count in
993 (count,count::stack,space_cell) in
994 let end_box stack space_cell count s =
995 (* this assumes that start_box and end_box are matched, but this is not
996 necessarily the case, if ( is modified and ) is context code *)
997 let count = string_length s count in
998 match stack with
999 | [x] ->
1000 (match check_for_newline count x space_cell with
1001 | Some count -> (count,[],None)
1002 | None -> (count,[],None)
1003 )
1004 | [] -> (count,stack,space_cell)
1005 | _ -> (count,List.tl stack,space_cell) in
1006 let rec loop ((stack,space_cell) as info) count = function
1007 | [] -> []
1008 | ((Cocci2(s,line,lcol,rcol,hint)) as a):: (* hint can't be start *)
1009 (T2(commatok,Ctx,idx,_))::
1010 (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,_,_)) ::
1011 (((T2(codetok,Ctx,_,_)) :: _) as xs)
1012 when
1013 (TH.str_of_tok commatok) = "," &&
1014 (TH.str_of_tok sptok) = " " &&
1015 ((List.length stack = 1) or (* not super elegant... *)
1016 (* check if the Cocci2 token is a ), need double treatment *)
1017 (List.length stack = 2) && (hint = Some Unparse_cocci.EndBox)) ->
1018 (* something large added before a comma *)
1019 let stack = (* do the work of end_box in the length 2 case *)
1020 if List.length stack = 2
1021 then List.tl stack
1022 else stack in
1023 let x = List.hd stack in
1024 let sp = ref " " in
1025 let cocci_count = string_length s count in
1026 let space_cell = Some (cocci_count+1,sp) in (* count before space *)
1027 let newcount = cocci_count + 2 in (* count incuding space *)
1028 let future_count =
1029 string_length (TH.str_of_tok codetok) newcount in
1030 let b = T2(commatok,Ctx,idx,
1031 Some (Unparse_cocci.SpaceOrNewline sp)) in
1032 (match check_for_newline future_count x space_cell with
1033 | Some count -> a :: b :: loop (stack,Some (x,sp)) count xs
1034 | None -> a :: b :: loop (stack,Some (newcount,sp)) newcount xs
1035 )
1036 | (T2(commatok,Ctx,_,_))::
1037 (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,idx,_)) ::
1038 (((Cocci2(s,line,lcol,rcol,hint))::_) as xs)
1039 when (TH.str_of_tok commatok) = "," && (TH.str_of_tok sptok) = " " &&
1040 List.length stack = 1 (* not super elegant... *) ->
1041 (* something large added after a comma *)
1042 let x = List.hd stack in
1043 let sp = ref " " in
1044 let space_cell = Some (count+1,sp) in (* count before space *)
1045 let newcount = count + 2 in (* count incuding space *)
1046 let future_count = string_length s newcount in
1047 let a = T2(commatok,Ctx,idx,
1048 Some (Unparse_cocci.SpaceOrNewline sp)) in
1049 (match check_for_newline future_count x space_cell with
1050 | Some count -> a :: loop (stack,Some (x,sp)) count xs
1051 | None -> a :: loop (stack,Some (newcount,sp)) newcount xs
1052 )
1053 | ((T2(tok,Ctx,idx,_)) as a)::xs ->
1054 (* let (stack,space_cell) = info in *)
1055 (match TH.str_of_tok tok with
1056 | "=" as s ->
1057 let (spaces,rest) = span is_space xs in
1058 (match rest with
1059 | ((T2(tok,Ctx,_,_)) as b)::ixs ->
1060 (match TH.str_of_tok tok with
1061 | "{" ->
1062 let newcount =
1063 List.fold_left
1064 (function prev ->
1065 function
1066 | (T2(tok,_b,_i,_h)) ->
1067 string_length (TH.str_of_tok tok) prev
1068 | _ -> failwith "not possible")
1069 count spaces in
1070 let front = a :: spaces @ [b] in
1071 let (newcount,newstack,newspacecell) =
1072 start_box stack space_cell newcount "{" in
1073 front @ loop (newstack,newspacecell) newcount ixs
1074 | s -> a :: loop info (string_length s count) xs
1075 )
1076 | _ -> a :: loop info (string_length s count) xs
1077 )
1078 | "(" as s ->
1079 let (newcount,newstack,newspacecell) =
1080 start_box stack space_cell count s in
1081 a :: loop (newstack,newspacecell) newcount xs
1082 | ")" as s ->
1083 let (newcount,newstack,newspacecell) =
1084 end_box stack space_cell count s in
1085 a :: loop (newstack,newspacecell) newcount xs
1086 | "{" as s when not (stack = []) ->
1087 (* [] case means statement braces *)
1088 let (newcount,newstack,newspacecell) =
1089 start_box stack space_cell count s in
1090 a :: loop (newstack,newspacecell) newcount xs
1091 | "}" as s when not (stack = []) ->
1092 (* [] case means statement braces *)
1093 let (newcount,newstack,newspacecell) =
1094 end_box stack space_cell count s in
1095 a :: loop (newstack,newspacecell) newcount xs
1096 | s -> a :: loop info (string_length s count) xs
1097 )
1098 | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs ->
1099 let (stack,space_cell) = info in
1100 let rest =
1101 match hint with
1102 | None -> loop info (string_length s count) xs
1103 | Some Unparse_cocci.StartBox ->
1104 let (newcount,newstack,newspacecell) =
1105 start_box stack space_cell count s in
1106 loop (newstack,newspacecell) newcount xs
1107 | Some Unparse_cocci.EndBox ->
1108 let (newcount,newstack,newspacecell) =
1109 end_box stack space_cell count s in
1110 loop (newstack,newspacecell) newcount xs
1111 | Some (Unparse_cocci.SpaceOrNewline sp) ->
1112 let count = string_length s (count + 1 (*space*)) in
1113 (match stack with
1114 | [x] ->
1115 (match check_for_newline count x space_cell with
1116 | Some count -> loop (stack,Some (x,sp)) count xs
1117 | None -> loop (stack,Some (count,sp)) count xs
1118 )
1119 | _ -> loop info count xs
1120 ) in
1121 a :: rest
1122 | ((T2(tok,_,_,_)) as a)::xs ->
1123 a :: loop info (string_length (TH.str_of_tok tok) count) xs
1124 | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs
1125 | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs
1126 | Fake2 _ :: _ | Indent_cocci2 :: _
1127 | Unindent_cocci2 _::_ | EatSpace2::_ ->
1128 failwith "unexpected fake, indent, unindent, or eatspace" in
1129 let redo_spaces prev = function
1130 | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1131 C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev
1132 | T2(tok,min,idx,Some (Unparse_cocci.SpaceOrNewline sp)) ->
1133 C2 !sp :: T2(tok,min,idx,None) :: prev
1134 | t -> t::prev in
1135 (match !Flag_parsing_c.spacing with
1136 | Flag_parsing_c.SMPL -> toks
1137 | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks))
1138 )
1139
1140 (* When insert some new code, because of a + in a SP, we must add this
1141 * code at the right place, with the good indentation. So each time we
1142 * encounter some spacing info, with some newline, we maintain the
1143 * current indentation level used.
1144 *
1145 * TODO problems: not accurate. ex: TODO
1146 *
1147 * TODO: if in #define region, should add a \ \n
1148 *)
1149 let new_tabbing2 space =
1150 list_of_string space
1151 +> List.rev
1152 +> take_until (fun c -> c =<= '\n')
1153 +> List.rev
1154 +> List.map string_of_char
1155 +> String.concat ""
1156
1157 let new_tabbing a =
1158 profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a)
1159
1160
1161 let rec adjust_indentation xs =
1162
1163 let _current_tabbing = ref ([] : string list) in
1164 let tabbing_unit = ref None in
1165
1166 let string_of_list l = String.concat "" (List.map string_of_char l) in
1167
1168 (* try to pick a tabbing unit for the plus code *)
1169 let adjust_tabbing_unit old_tab new_tab =
1170 if !tabbing_unit =*= None && String.length new_tab > String.length old_tab
1171 then
1172 let old_tab = list_of_string old_tab in
1173 let new_tab = list_of_string new_tab in
1174 let rec loop = function
1175 | ([],new_tab) ->
1176 tabbing_unit := Some(string_of_list new_tab,List.rev new_tab)
1177 | (_,[]) -> failwith "not possible"
1178 | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *)
1179 loop (old_tab,new_tab) in
1180
1181 (*
1182 let remtab tu current_tab =
1183 let current_tab = List.rev(list_of_string current_tab) in
1184 let rec loop = function
1185 ([],new_tab) -> string_of_list (List.rev new_tab)
1186 | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-)
1187 ""
1188 | (t::ts,n::ns) when t =<= n -> loop (ts,ns)
1189 | (_,ns) -> (-* mismatch; remove what we can *-)
1190 string_of_list (List.rev ns) in
1191 loop (tu,current_tab) in
1192 *)
1193
1194 let rec find_first_tab started = function
1195 | [] -> ()
1196 | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1197 find_first_tab true xs
1198 (* patch: coccinelle *)
1199 | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::_
1200 when started ->
1201 let s = str_of_token2 x +> new_tabbing in
1202 tabbing_unit := Some (s,List.rev (list_of_string s))
1203 | x::xs -> find_first_tab started xs in
1204 find_first_tab false xs;
1205
1206 let rec balanced ct = function
1207 | [] -> ct >= 0
1208 | ((T2(tok,_,_,_)) as x)::xs ->
1209 (match str_of_token2 x with
1210 | "(" -> balanced (ct+1) xs
1211 | ")" -> balanced (ct-1) xs
1212 | _ -> balanced ct xs
1213 )
1214 | x::xs -> balanced ct xs in
1215
1216 let update_tabbing started s x =
1217 let old_tabbing = !_current_tabbing in
1218 str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]);
1219 (* only trust the indentation after the first { *)
1220 if started
1221 then
1222 adjust_tabbing_unit
1223 (String.concat "" old_tabbing)
1224 (String.concat "" !_current_tabbing) in
1225
1226 let rec aux started xs =
1227 match xs with
1228 | [] -> []
1229 (* patch: coccinelle *)
1230 | ((T2 (Parser_c.TCommentNewline s,_,_,_)) as x)::
1231 Unindent_cocci2(false)::xs ->
1232 update_tabbing started s x;
1233 (C2 "\n")::aux started xs
1234 | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs ->
1235 (C2 "\n")::aux started xs
1236 | ((T2 (tok,_,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _, _))::
1237 ((Cocci2 ("{",_,_,_,_)) as a)::xs
1238 when started && str_of_token2 x =$= ")" ->
1239 (* to be done for if, etc, but not for a function header *)
1240 x::(C2 " ")::a::(aux started xs)
1241 | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::xs
1242 when
1243 balanced 0 (fst(span (function x -> not(is_newline x)) xs)) ->
1244 update_tabbing started s x;
1245 let coccis_rest = span all_coccis xs in
1246 (match coccis_rest with
1247 | (_::_,((T2 (tok,_,_,_)) as y)::_) when str_of_token2 y =$= "}" ->
1248 (* the case where cocci code has been added before a close } *)
1249 x::aux started (Indent_cocci2::xs)
1250 | _ -> x::aux started xs
1251 )
1252 | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs
1253 when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL ->
1254 let tu = String.make (lcoly-1) ' ' in
1255 _current_tabbing := tu::(!_current_tabbing);
1256 C2 (tu)::aux started (y::xs)
1257 | Indent_cocci2::xs ->
1258 (match !tabbing_unit with
1259 | None -> aux started xs
1260 | Some (tu,_) ->
1261 _current_tabbing := tu::(!_current_tabbing);
1262 (* can't be C2, for later phases *)
1263 Cocci2 (tu,-1,-1,-1,None)::aux started xs
1264 )
1265 | Unindent_cocci2(permanent)::((Cocci2("\n",_,_,_,_)) as x)::xs ->
1266 (* seems only relevant if there is a following cocci newline *)
1267 (match !_current_tabbing with
1268 | [] -> aux started xs
1269 | _::new_tabbing ->
1270 let s = String.concat "" new_tabbing in
1271 _current_tabbing := new_tabbing;
1272 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1273 )
1274 | Unindent_cocci2(permanent)::xs -> aux started xs
1275 (* border between existing code and cocci code *)
1276 | ((T2 (tok,_,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs
1277 when str_of_token2 x =$= "{" ->
1278 x::aux true (y::Indent_cocci2::xs)
1279 | ((Cocci2 _) as x)::((T2 (tok,_,_,_)) as y)::xs
1280 when str_of_token2 y =$= "}" ->
1281 x::aux started (Unindent_cocci2 true::y::xs)
1282 (* starting the body of the function *)
1283 | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" ->
1284 x::aux true xs
1285 | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs
1286 | ((Cocci2("\n",_,_,_,_)) as x)::xs ->
1287 (* dont inline in expr because of weird eval order of ocaml *)
1288 let s = String.concat "" !_current_tabbing in
1289 (* can't be C2, for later phases *)
1290 x::Cocci2 (s,-1,-1,-1,None)::aux started xs
1291 | x::xs -> x::aux started xs in
1292 (aux false xs,!tabbing_unit)
1293
1294
1295 let rec find_paren_comma = function
1296 | [] -> ()
1297
1298 (* do nothing if was like this in original file *)
1299 | { str = "("; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs)
1300 | { str = ","; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs)
1301 | { str = ","; idx = Some p1 } :: ({ str = ")"; idx = Some p2} :: _ as xs)
1302 when p2 =|= p1 + 1 ->
1303 find_paren_comma xs
1304
1305 (* otherwise yes can adjust *)
1306 | { str = "(" } :: (({ str = ","} as rem) :: _ as xs)
1307 | ({ str = "," } as rem) :: ({ str = ","} :: _ as xs)
1308 | ({ str = "," } as rem) :: ({ str = ")"} :: _ as xs) ->
1309 rem.remove <- true;
1310 find_paren_comma xs
1311
1312 | x::xs ->
1313 find_paren_comma xs
1314
1315
1316 let fix_tokens toks =
1317 let toks = toks +> List.map mk_token_extended in
1318
1319 let cleaner = toks +> exclude (function
1320 | {tok2 = T2 (t,_,_,_)} -> TH.is_real_comment t (* I want the ifdef *)
1321 | _ -> false
1322 ) in
1323 find_paren_comma cleaner;
1324
1325 let toks = rebuild_tokens_extented toks in
1326 toks +> List.map (fun x -> x.tok2)
1327
1328 (* if we have to remove a '}' that is alone on a line, remove the line too *)
1329 let drop_line toks =
1330 let rec space_until_newline toks =
1331 match toks with
1332 | (T2(_, Min _, _, _) as hd) :: tl ->
1333 let (drop, tl) = space_until_newline tl in
1334 (drop, hd :: tl)
1335 | hd :: tl when is_space hd ->
1336 space_until_newline tl
1337 | Fake2 _ :: tl ->
1338 space_until_newline tl
1339 | hd :: tl when is_newline hd ->
1340 (true, toks)
1341 | _ ->
1342 (false, toks) in
1343 let rec loop toks =
1344 match toks with
1345 | (T2(_, Min _, _, _) as x) :: tl
1346 when str_of_token2 x =$= "}" ->
1347 let (drop, tl) = space_until_newline tl in
1348 (drop, x :: tl)
1349 | hd :: tl when is_whitespace hd ->
1350 let (drop, tl) = loop tl in
1351 if drop then
1352 (true, tl)
1353 else
1354 (false, toks)
1355 | _ -> (false, toks) in
1356 let rec find toks =
1357 let (_, toks) = loop toks in
1358 match toks with
1359 | [] -> []
1360 | hd :: tl ->
1361 hd :: find tl in
1362 find toks
1363
1364 (*****************************************************************************)
1365 (* Final unparsing (and debugging support) *)
1366 (*****************************************************************************)
1367
1368 (* for debugging *)
1369 type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin
1370
1371 let kind_of_token2 = function
1372 | Fake2 _ -> KFake
1373 | Cocci2 _ -> KCocci
1374 | C2 _ -> KC
1375 | Comma _ -> KC
1376 | T2 (t,_,_,_) ->
1377 (match TH.pinfo_of_tok t with
1378 | Ast_c.ExpandedTok _ -> KExpanded
1379 | Ast_c.OriginTok _ -> KOrigin
1380 | Ast_c.FakeTok _ -> raise (Impossible 139) (* now a Fake2 *)
1381 | Ast_c.AbstractLineTok _ -> raise (Impossible 140) (* now a KC *)
1382 )
1383 | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise (Impossible 141)
1384
1385 let end_mark = "!"
1386
1387 let start_mark = function
1388 | KFake -> "!F!"
1389 | KCocci -> "!S!"
1390 | KC -> "!A!"
1391 | KExpanded -> "!E!"
1392 | KOrigin -> ""
1393
1394 let print_all_tokens2 pr xs =
1395 if !Flag_parsing_c.debug_unparsing
1396 then
1397 let current_kind = ref KOrigin in
1398 xs +> List.iter (fun t ->
1399 let newkind = kind_of_token2 t in
1400 if newkind =*= !current_kind
1401 then pr (str_of_token2 t)
1402 else
1403 begin
1404 pr (end_mark);
1405 pr (start_mark newkind);
1406 pr (str_of_token2 t);
1407 current_kind := newkind
1408 end
1409 );
1410 else
1411 let to_whitespace s =
1412 let r = String.copy s in
1413 for i = 1 to String.length r do
1414 let c = String.get r (i-1) in
1415 match c with
1416 | ' ' | '\t' | '\r' | '\n' -> ()
1417 | _ -> String.set r (i-1) ' '
1418 done;
1419 r in
1420 let hiding_level = ref 0 in
1421 let handle_token t =
1422 let s = str_of_token2 t in
1423 let hide_current =
1424 match t with
1425 | T2 (t,_,_,_) ->
1426 let i = TH.info_of_tok t in
1427 (match Ast_c.get_annot_info i Token_annot.Exclude_start with
1428 | None -> ()
1429 | Some _ -> hiding_level := !hiding_level + 1
1430 );
1431 let hide_current = !hiding_level > 0 in
1432 (match Ast_c.get_annot_info i Token_annot.Exclude_end with
1433 | None -> ()
1434 | Some _ -> hiding_level := max (!hiding_level - 1) 0
1435 );
1436 hide_current
1437 | _ -> !hiding_level > 0 in
1438 if hide_current then to_whitespace s else s in
1439 xs +> List.iter (fun x -> pr (handle_token x))
1440
1441
1442
1443
1444
1445 (*****************************************************************************)
1446 (* Entry points *)
1447 (*****************************************************************************)
1448
1449 (* old: PPviatok was made in the beginning to allow to pretty print a
1450 * complete C file, including a modified C file by transformation.ml,
1451 * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and
1452 * maybe flow_to_ast) all the cases. Indeed we don't need to do some
1453 * fancy stuff when a function was not modified at all. Just need to
1454 * print the list of token as-is. But now pretty_print_c.ml handles
1455 * almost everything so maybe less useful. Maybe PPviatok allows to
1456 * optimize a little the pretty printing.
1457 *
1458 * update: now have PPviastr which goes even faster than PPviatok, so
1459 * PPviatok has disappeared.
1460 *)
1461
1462 type ppmethod = PPnormal | PPviastr
1463
1464
1465
1466
1467 (* The pp_program function will call pretty_print_c.ml with a special
1468 * function to print the leaf components, the tokens. When we want to
1469 * print a token, we need to print also maybe the space and comments that
1470 * were close to it in the original file (and that was omitted during the
1471 * parsing phase), and honor what the cocci-info attached to the token says.
1472 * Maybe we will not print the token if it's a MINUS-token, and maybe we will
1473 * print it and also print some cocci-code attached in a PLUS to it.
1474 * So we will also maybe call unparse_cocci. Because the cocci-code may
1475 * contain metavariables, unparse_cocci will in fact sometimes call back
1476 * pretty_print_c (which will this time don't call back again unparse_cocci)
1477 *)
1478
1479 let pp_program2 xs outfile =
1480 with_open_outfile outfile (fun (pr,chan) ->
1481 let pr s =
1482 if !Flag_parsing_c.debug_unparsing
1483 then begin pr2_no_nl s; flush stderr end
1484 else pr s
1485 (* flush chan; *)
1486 (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *)
1487 in
1488
1489 xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) ->
1490 (* here can still work on ast *)
1491 let e = remove_useless_fakeInfo_struct e in
1492
1493 match ppmethod with
1494 | PPnormal ->
1495 (* now work on tokens *)
1496 (* phase1: just get all the tokens, all the information *)
1497 assert(toks_e +> List.for_all (fun t ->
1498 TH.is_origin t or TH.is_expanded t
1499 ));
1500 let toks = get_fakeInfo_and_tokens e toks_e in
1501 let toks = displace_fake_nodes toks in
1502 (* assert Origin;ExpandedTok;Faketok *)
1503 let toks = expand_mcode toks in
1504
1505 (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok)
1506 * and no tag information, just NOTHING. *)
1507
1508 let toks =
1509 if !Flag.sgrep_mode2
1510 then
1511 (* nothing else to do for sgrep *)
1512 drop_expanded(drop_fake(drop_minus toks))
1513 else
1514 begin
1515 (* phase2: can now start to filter and adjust *)
1516 let (toks,tu) = adjust_indentation toks in
1517 let toks = adjust_eat_space toks in
1518 let toks = adjust_before_semicolon toks in(*before remove minus*)
1519 let toks = adjust_after_paren toks in(*also before remove minus*)
1520 let toks = drop_space_at_endline toks in
1521 let toks = paren_to_space toks in
1522 let toks = drop_end_comma toks in
1523
1524 let toks = drop_line toks in
1525
1526 let toks = remove_minus_and_between_and_expanded_and_fake toks in
1527 (* assert Origin + Cocci + C and no minus *)
1528 let toks = add_space toks in
1529 let toks = add_newlines toks tu in
1530 let toks = paren_then_brace toks in
1531 let toks = fix_tokens toks in
1532 toks
1533 end in
1534
1535 (* in theory here could reparse and rework the ast! or
1536 * apply some SP. Not before cos julia may have generated
1537 * not parsable file. Need do unparsing_tricks call before
1538 * being ready to reparse. *)
1539 print_all_tokens2 pr toks;
1540
1541 | PPviastr -> pr str
1542 )
1543 )
1544
1545 let pp_program a b =
1546 profile_code "C unparsing" (fun () -> pp_program2 a b)
1547
1548
1549 let pp_program_default xs outfile =
1550 let xs' = xs +> List.map (fun x -> x, PPnormal) in
1551 pp_program xs' outfile