Commit | Line | Data |
---|---|---|
0708f913 C |
1 | (* Yoann Padioleau, Julia Lawall |
2 | * | |
3 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU | |
34e49164 C |
4 | * |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License (GPL) | |
7 | * version 2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * file license.txt for more details. | |
485bce71 C |
13 | * |
14 | * | |
15 | * Modifications by Julia Lawall for better newline handling. | |
34e49164 C |
16 | *) |
17 | open Common | |
18 | ||
19 | open Ast_c | |
20 | ||
21 | module TH = Token_helpers | |
22 | ||
23 | ||
b1b2de81 C |
24 | (* should keep comments and directives in between adjacent deleted terms, |
25 | but not comments and directives within deleted terms. should use the | |
26 | labels found in the control-flow graph *) | |
27 | ||
34e49164 C |
28 | |
29 | ||
708f4980 C |
30 | (*****************************************************************************) |
31 | (* Wrappers *) | |
32 | (*****************************************************************************) | |
33 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing | |
34 | ||
34e49164 C |
35 | (*****************************************************************************) |
36 | (* Types used during the intermediate phases of the unparsing *) | |
37 | (*****************************************************************************) | |
38 | ||
39 | type token1 = | |
40 | | Fake1 of info | |
41 | | T1 of Parser_c.token | |
42 | ||
43 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
44 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
45 | * token and get something simpler ? because we need to know if the | |
46 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
47 | * analysis so easier to keep with the token. | |
48 | * | |
49 | * This type contains the whole information. Have all the tokens with this | |
50 | * type. | |
51 | *) | |
708f4980 C |
52 | type min = |
53 | Min of (int list (* match numbers *) * int (* adjacency information *)) | |
54 | | Ctx | |
55 | ||
34e49164 | 56 | type token2 = |
708f4980 | 57 | | T2 of Parser_c.token * min * |
34e49164 C |
58 | int option (* orig index, abstracting away comments and space *) |
59 | | Fake2 | |
708f4980 | 60 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
34e49164 C |
61 | | C2 of string |
62 | | Indent_cocci2 | |
63 | | Unindent_cocci2 | |
64 | ||
65 | (* not used yet *) | |
66 | type token3 = | |
67 | | T3 of Parser_c.token | |
68 | | Cocci3 of string | |
69 | | C3 of string | |
70 | ||
71 | ||
72 | (* similar to the tech in parsing_hack *) | |
73 | type token_extended = { | |
74 | tok2 : token2; | |
75 | str : string; | |
76 | idx: int option; (* to know if 2 tokens were consecutive in orig file *) | |
77 | mutable new_tokens_before : token2 list; | |
78 | mutable remove : bool; | |
79 | } | |
80 | ||
81 | ||
82 | (*****************************************************************************) | |
83 | (* Helpers *) | |
84 | (*****************************************************************************) | |
85 | ||
86 | let info_of_token1 t = | |
87 | match t with | |
88 | | Fake1 info -> info | |
89 | | T1 tok -> TH.info_of_tok tok | |
90 | ||
002099fc C |
91 | let print_token1 = function |
92 | T1 tok -> TH.str_of_tok tok | |
93 | | Fake1 info -> "fake" | |
94 | ||
34e49164 C |
95 | let str_of_token2 = function |
96 | | T2 (t,_,_) -> TH.str_of_tok t | |
97 | | Fake2 -> "" | |
708f4980 | 98 | | Cocci2 (s,_,_,_) -> s |
34e49164 C |
99 | | C2 s -> s |
100 | | Indent_cocci2 -> "" | |
101 | | Unindent_cocci2 -> "" | |
102 | ||
103 | let print_token2 = function | |
708f4980 C |
104 | | T2 (t,b,_) -> |
105 | let b_str = | |
106 | match b with | |
107 | Min (index,adj) -> | |
108 | Printf.sprintf "-%d[%s]" adj | |
109 | (String.concat " " (List.map string_of_int index)) | |
110 | | Ctx -> "" in | |
111 | "T2:"^b_str^TH.str_of_tok t | |
34e49164 | 112 | | Fake2 -> "" |
708f4980 | 113 | | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
34e49164 C |
114 | | C2 s -> "C2:"^s |
115 | | Indent_cocci2 -> "Indent" | |
116 | | Unindent_cocci2 -> "Unindent" | |
117 | ||
002099fc C |
118 | let simple_print_all_tokens1 l = |
119 | List.iter (function x -> Printf.printf "%s " (print_token1 x)) l; | |
120 | Printf.printf "\n" | |
121 | ||
708f4980 | 122 | let simple_print_all_tokens2 l = |
34e49164 C |
123 | List.iter (function x -> Printf.printf "%s " (print_token2 x)) l; |
124 | Printf.printf "\n" | |
125 | ||
126 | let str_of_token3 = function | |
127 | | T3 t -> TH.str_of_tok t | |
128 | | Cocci3 s | C3 s -> s | |
129 | ||
130 | ||
131 | ||
132 | let mk_token_extended x = | |
133 | let origidx = | |
134 | match x with | |
135 | | T2 (_,_, idx) -> idx | |
136 | | _ -> None | |
137 | in | |
138 | { tok2 = x; | |
139 | str = str_of_token2 x; | |
140 | idx = origidx; | |
141 | new_tokens_before = []; | |
142 | remove = false; | |
143 | } | |
144 | ||
145 | let rebuild_tokens_extented toks_ext = | |
146 | let _tokens = ref [] in | |
147 | toks_ext +> List.iter (fun tok -> | |
148 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); | |
149 | if not tok.remove then push2 tok.tok2 _tokens; | |
150 | ); | |
151 | let tokens = List.rev !_tokens in | |
152 | (tokens +> List.map mk_token_extended) | |
153 | ||
154 | ||
155 | let mcode_contain_plus = function | |
156 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
157 | | Ast_cocci.CONTEXT _ -> true | |
485bce71 | 158 | (* patch: when need full coccinelle transformation *) |
708f4980 C |
159 | | Ast_cocci.MINUS (_,_,_,[]) -> false |
160 | | Ast_cocci.MINUS (_,_,_,x::xs) -> true | |
34e49164 C |
161 | | Ast_cocci.PLUS -> raise Impossible |
162 | ||
163 | let contain_plus info = | |
164 | let mck = Ast_c.mcode_of_info info in | |
165 | mcode_contain_plus mck | |
166 | ||
167 | (*****************************************************************************) | |
168 | (* Last fix on the ast *) | |
169 | (*****************************************************************************) | |
170 | ||
171 | (* Because of the ugly trick to handle initialiser, I generate fake ',' | |
172 | * for the last initializer element, but if there is nothing around it, | |
173 | * I don't want in the end to print it. | |
174 | *) | |
175 | ||
176 | let remove_useless_fakeInfo_struct program = | |
177 | let bigf = { Visitor_c.default_visitor_c_s with | |
178 | Visitor_c.kini_s = (fun (k,bigf) ini -> | |
179 | match k ini with | |
113803cf | 180 | | InitList args, ii -> |
34e49164 C |
181 | (match ii with |
182 | | [_i1;_i2] -> ini | |
113803cf | 183 | | [i1;i2;iicommaopt] -> |
34e49164 C |
184 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) |
185 | && (Ast_c.is_fake iicommaopt) | |
186 | (* sometimes the guy put a normal iicommaopt *) | |
34e49164 C |
187 | then InitList args, [i1;i2] |
188 | else InitList args, [i1;i2;iicommaopt] | |
113803cf C |
189 | | [i1;i2;iicommaopt;end_comma_opt] -> |
190 | (* only in #define. end_comma_opt canot be fake *) | |
191 | (* not sure if this will be considered ambiguous with a previous | |
192 | case? *) | |
193 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) | |
194 | && (Ast_c.is_fake iicommaopt) | |
195 | (* sometimes the guy put a normal iicommaopt *) | |
196 | then InitList args, [i1;i2;end_comma_opt] | |
197 | else InitList args, [i1;i2;iicommaopt;end_comma_opt] | |
34e49164 C |
198 | | _ -> raise Impossible |
199 | ) | |
200 | | x -> x | |
201 | ) | |
202 | } in | |
203 | Visitor_c.vk_toplevel_s bigf program | |
204 | ||
205 | ||
206 | (*****************************************************************************) | |
207 | (* Tokens1 generation *) | |
208 | (*****************************************************************************) | |
209 | ||
210 | let get_fakeInfo_and_tokens celem toks = | |
211 | let toks_in = ref toks in | |
212 | let toks_out = ref [] in | |
213 | ||
214 | (* todo? verify good order of position ? *) | |
215 | let pr_elem info = | |
216 | match Ast_c.pinfo_of_info info with | |
217 | | FakeTok _ -> | |
218 | Common.push2 (Fake1 info) toks_out | |
219 | | OriginTok _ | ExpandedTok _ -> | |
220 | (* get the associated comments/space/cppcomment tokens *) | |
978fd7e5 C |
221 | let (before, x, after) = |
222 | !toks_in +> Common.split_when (fun tok -> | |
34e49164 C |
223 | info =*= TH.info_of_tok tok) |
224 | in | |
b1b2de81 | 225 | assert(info =*= TH.info_of_tok x); |
34e49164 C |
226 | (*old: assert(before +> List.for_all (TH.is_comment)); *) |
227 | before +> List.iter (fun x -> | |
228 | if not (TH.is_comment x) | |
0708f913 | 229 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) |
34e49164 C |
230 | (* case such as int asm d3("x"); not yet in ast *) |
231 | ); | |
232 | before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); | |
233 | push2 (T1 x) toks_out; | |
234 | toks_in := after; | |
235 | | AbstractLineTok _ -> | |
236 | (* can be called on type info when for instance use -type_c *) | |
237 | if !Flag_parsing_c.pretty_print_type_info | |
238 | then Common.push2 (Fake1 info) toks_out | |
239 | else raise Impossible (* at this stage *) | |
240 | in | |
241 | ||
242 | let pr_space _ = () in (* use the spacing that is there already *) | |
243 | ||
244 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
245 | ||
246 | if not (null !toks_in) | |
0708f913 | 247 | then failwith "WEIRD: unparsing not finished"; |
34e49164 C |
248 | |
249 | List.rev !toks_out | |
250 | ||
251 | (* Fake nodes that have BEFORE code should be moved over any subsequent | |
252 | whitespace and newlines, but not any comments, to get as close to the affected | |
002099fc | 253 | code as possible. Similarly, fake nodes that have AFTER code should be moved |
34e49164 C |
254 | backwards. No fake nodes should have both before and after code. *) |
255 | ||
256 | let displace_fake_nodes toks = | |
257 | let is_fake = function Fake1 _ -> true | _ -> false in | |
258 | let is_whitespace = function | |
259 | T1(Parser_c.TCommentSpace _) | |
485bce71 | 260 | (* patch: cocci *) |
34e49164 C |
261 | | T1(Parser_c.TCommentNewline _) -> true |
262 | | _ -> false in | |
263 | let rec loop toks = | |
264 | let fake_info = | |
265 | try Some (Common.split_when is_fake toks) | |
266 | with Not_found -> None in | |
267 | match fake_info with | |
268 | Some(bef,((Fake1 info) as fake),aft) -> | |
269 | (match !(info.cocci_tag) with | |
708f4980 C |
270 | | Some x -> |
271 | (match x with | |
002099fc | 272 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> |
34e49164 | 273 | (* move the fake node forwards *) |
002099fc C |
274 | let (whitespace,rest) = Common.span is_whitespace aft in |
275 | bef @ whitespace @ fake :: (loop rest) | |
276 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
34e49164 | 277 | (* move the fake node backwards *) |
002099fc C |
278 | let revbef = List.rev bef in |
279 | let (revwhitespace,revprev) = Common.span is_whitespace revbef in | |
280 | let whitespace = List.rev revwhitespace in | |
281 | let prev = List.rev revprev in | |
282 | prev @ fake :: (loop (whitespace @ aft)) | |
283 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) -> | |
284 | bef @ fake :: (loop aft) | |
285 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
286 | failwith "fake node should not be before-after" | |
287 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) | |
485bce71 | 288 | ) |
708f4980 C |
289 | | None -> |
290 | bef @ fake :: (loop aft) | |
291 | ) | |
34e49164 C |
292 | | None -> toks |
293 | | _ -> raise Impossible in | |
294 | loop toks | |
295 | ||
296 | (*****************************************************************************) | |
297 | (* Tokens2 generation *) | |
298 | (*****************************************************************************) | |
299 | ||
b1b2de81 C |
300 | let comment2t2 = function |
301 | (Token_c.TCommentCpp x,(info : Token_c.info)) -> | |
302 | C2("\n"^info.Common.str^"\n") | |
303 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x)) | |
304 | ||
34e49164 C |
305 | let expand_mcode toks = |
306 | let toks_out = ref [] in | |
307 | ||
308 | let index = ref 0 in | |
309 | ||
310 | let add_elem t minus = | |
311 | match t with | |
312 | | Fake1 info -> | |
313 | let str = Ast_c.str_of_info info in | |
b1b2de81 | 314 | if str =$= "" |
34e49164 C |
315 | then push2 (Fake2) toks_out |
316 | (* perhaps the fake ',' *) | |
317 | else push2 (C2 str) toks_out | |
318 | ||
319 | ||
002099fc | 320 | | T1 tok -> |
b1b2de81 | 321 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
34e49164 | 322 | (* no tag on expandedTok ! *) |
0708f913 C |
323 | (if (TH.is_expanded tok && |
324 | !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot) | |
325 | then | |
326 | failwith | |
327 | (Printf.sprintf | |
328 | "expanded token %s on line %d is either modified or stored in a metavariable" | |
329 | (TH.str_of_tok tok) (TH.line_of_tok tok))); | |
34e49164 C |
330 | |
331 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> | |
332 | { i with cocci_tag = ref Ast_c.emptyAnnot; } | |
333 | ) in | |
334 | ||
335 | let optindex = | |
336 | if TH.is_origin tok && not (TH.is_real_comment tok) | |
337 | then begin | |
338 | incr index; | |
339 | Some !index | |
340 | end | |
341 | else None | |
342 | in | |
343 | ||
344 | push2 (T2 (tok', minus, optindex)) toks_out | |
345 | in | |
346 | ||
347 | let expand_info t = | |
708f4980 C |
348 | let (mcode,env) = |
349 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in | |
34e49164 | 350 | |
708f4980 C |
351 | let pr_cocci s ln col rcol = |
352 | push2 (Cocci2(s,ln,col,rcol)) toks_out in | |
34e49164 | 353 | let pr_c info = |
b1b2de81 C |
354 | (match Ast_c.pinfo_of_info info with |
355 | Ast_c.AbstractLineTok _ -> | |
356 | push2 (C2 (Ast_c.str_of_info info)) toks_out | |
357 | | Ast_c.FakeTok (s,_) -> | |
358 | push2 (C2 s) toks_out | |
34e49164 C |
359 | | _ -> |
360 | Printf.printf "line: %s\n" (Common.dump info); | |
b1b2de81 C |
361 | failwith "not an abstract line"); |
362 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> | |
363 | List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in | |
364 | ||
708f4980 C |
365 | let pr_barrier ln col = (* marks a position, used around C code *) |
366 | push2 (Cocci2("",ln,col,col)) toks_out in | |
367 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) | |
b1b2de81 | 368 | |
708f4980 | 369 | let pr_cspace _ = push2 (C2 " ") toks_out in |
34e49164 | 370 | |
708f4980 C |
371 | let pr_space _ = () (* rely on add_space in cocci code *) in |
372 | let pr_arity _ = () (* not interested *) in | |
34e49164 C |
373 | |
374 | let indent _ = push2 Indent_cocci2 toks_out in | |
375 | let unindent _ = push2 Unindent_cocci2 toks_out in | |
376 | ||
708f4980 C |
377 | let args_pp = |
378 | (env, pr_cocci, pr_c, pr_cspace, | |
379 | (match !Flag_parsing_c.spacing with | |
380 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), | |
381 | pr_arity, | |
382 | (match !Flag_parsing_c.spacing with | |
383 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), | |
384 | indent, unindent) in | |
34e49164 | 385 | |
485bce71 C |
386 | (* old: when for yacfe with partial cocci: |
387 | * add_elem t false; | |
388 | *) | |
34e49164 | 389 | |
485bce71 | 390 | (* patch: when need full coccinelle transformation *) |
faf9a90c | 391 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
34e49164 | 392 | match mcode with |
708f4980 | 393 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
34e49164 C |
394 | (* Why adding ? because I want to have all the information, the whole |
395 | * set of tokens, so I can then process and remove the | |
396 | * is_between_two_minus for instance *) | |
708f4980 | 397 | add_elem t (Min (inst,adj)); |
faf9a90c | 398 | unparser any_xxs Unparse_cocci.InPlace |
34e49164 C |
399 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
400 | (match any_befaft with | |
401 | | Ast_cocci.NOTHING -> | |
708f4980 | 402 | add_elem t Ctx |
34e49164 | 403 | | Ast_cocci.BEFORE xxs -> |
faf9a90c | 404 | unparser xxs Unparse_cocci.Before; |
708f4980 | 405 | add_elem t Ctx |
34e49164 | 406 | | Ast_cocci.AFTER xxs -> |
708f4980 | 407 | add_elem t Ctx; |
faf9a90c | 408 | unparser xxs Unparse_cocci.After; |
34e49164 | 409 | | Ast_cocci.BEFOREAFTER (xxs, yys) -> |
faf9a90c | 410 | unparser xxs Unparse_cocci.Before; |
708f4980 | 411 | add_elem t Ctx; |
faf9a90c | 412 | unparser yys Unparse_cocci.After; |
34e49164 C |
413 | ) |
414 | | Ast_cocci.PLUS -> raise Impossible | |
415 | ||
416 | in | |
417 | ||
418 | toks +> List.iter expand_info; | |
419 | List.rev !toks_out | |
420 | ||
421 | ||
422 | (*****************************************************************************) | |
423 | (* Tokens2 processing, filtering, adjusting *) | |
424 | (*****************************************************************************) | |
425 | ||
002099fc C |
426 | let is_space = function |
427 | | (T2 (t,_b,_i)) -> | |
428 | (match t with | |
429 | | Parser_c.TCommentSpace _ -> true (* only whitespace *) | |
430 | | _ -> false | |
431 | ) | |
432 | | _ -> false | |
433 | ||
34e49164 | 434 | let is_minusable_comment = function |
b1b2de81 | 435 | | (T2 (t,_b,_i)) -> |
34e49164 C |
436 | (match t with |
437 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
485bce71 | 438 | (* patch: coccinelle *) |
002099fc C |
439 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
440 | | Parser_c.TComment _ | |
441 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
b1b2de81 C |
442 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
443 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *) | |
34e49164 C |
444 | -> true |
445 | ||
b1b2de81 | 446 | | Parser_c.TCommentMisc _ |
0708f913 | 447 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) |
34e49164 C |
448 | -> false |
449 | ||
450 | | _ -> false | |
451 | ) | |
452 | | _ -> false | |
453 | ||
454 | let all_coccis = function | |
455 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true | |
456 | | _ -> false | |
457 | ||
b1b2de81 C |
458 | (*previously gave up if the first character was a newline, but not clear why*) |
459 | let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x | |
34e49164 | 460 | |
708f4980 C |
461 | let set_minus_comment adj = function |
462 | | T2 (t,Ctx,idx) -> | |
34e49164 C |
463 | let str = TH.str_of_tok t in |
464 | (match t with | |
465 | | Parser_c.TCommentSpace _ | |
485bce71 | 466 | (* patch: coccinelle *) |
34e49164 C |
467 | | Parser_c.TCommentNewline _ -> () |
468 | ||
469 | | Parser_c.TComment _ | |
0708f913 | 470 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
b1b2de81 C |
471 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
472 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
34e49164 | 473 | -> |
b1b2de81 C |
474 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" |
475 | (TH.line_of_tok t) str) | |
34e49164 C |
476 | | _ -> raise Impossible |
477 | ); | |
708f4980 | 478 | T2 (t, Min adj, idx) |
485bce71 | 479 | (* patch: coccinelle *) |
708f4980 | 480 | | T2 (Parser_c.TCommentNewline _,Min adj,idx) as x -> x |
34e49164 C |
481 | | _ -> raise Impossible |
482 | ||
708f4980 | 483 | let set_minus_comment_or_plus adj = function |
34e49164 | 484 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x |
708f4980 | 485 | | x -> set_minus_comment adj x |
34e49164 C |
486 | |
487 | let remove_minus_and_between_and_expanded_and_fake xs = | |
488 | ||
489 | (* get rid of exampled and fake tok *) | |
490 | let xs = xs +> Common.exclude (function | |
491 | | T2 (t,_,_) when TH.is_expanded t -> true | |
492 | | Fake2 -> true | |
493 | ||
494 | | _ -> false | |
495 | ) | |
496 | in | |
497 | ||
498 | (*This drops the space before each completely minused block (no plus code).*) | |
113803cf | 499 | let minus_or_comment = function |
708f4980 | 500 | T2(_,Min adj,_) -> true |
113803cf C |
501 | | T2(Parser_c.TCommentNewline _,_b,_i) -> false |
502 | | x -> is_minusable_comment x in | |
503 | ||
34e49164 C |
504 | let rec adjust_before_minus = function |
505 | [] -> [] | |
485bce71 | 506 | (* patch: coccinelle *) |
708f4980 C |
507 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x):: |
508 | ((T2(_,Min adj,_)::_) as xs) -> | |
34e49164 C |
509 | let (between_minus,rest) = Common.span minus_or_comment xs in |
510 | (match rest with | |
708f4980 | 511 | [] -> (set_minus_comment adj x) :: between_minus |
34e49164 | 512 | | T2(Parser_c.TCommentNewline _,_b,_i)::_ -> |
708f4980 | 513 | (set_minus_comment adj x) :: between_minus @ |
34e49164 C |
514 | (adjust_before_minus rest) |
515 | | _ -> x :: between_minus @ (adjust_before_minus rest)) | |
516 | | x::xs -> x::adjust_before_minus xs in | |
517 | ||
518 | let xs = adjust_before_minus xs in | |
519 | ||
113803cf C |
520 | (* this drops blank lines after a brace introduced by removing code *) |
521 | let rec adjust_after_brace = function | |
522 | [] -> [] | |
708f4980 | 523 | | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs) |
b1b2de81 | 524 | when str_of_token2 x =$= "{" -> |
113803cf C |
525 | let (between_minus,rest) = Common.span minus_or_comment xs in |
526 | let is_whitespace = function | |
527 | T2(Parser_c.TCommentSpace _,_b,_i) | |
528 | (* patch: cocci *) | |
529 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
530 | | _ -> false in | |
531 | let (newlines,rest) = Common.span is_whitespace rest in | |
532 | let (drop_newlines,last_newline) = | |
533 | let rec loop = function | |
534 | [] -> ([],[]) | |
535 | | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest -> | |
536 | (List.rev rest,[x]) | |
537 | | x::xs -> | |
538 | let (drop_newlines,last_newline) = loop xs in | |
539 | (drop_newlines,x::last_newline) in | |
540 | loop (List.rev newlines) in | |
708f4980 | 541 | x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@ |
113803cf C |
542 | last_newline@ |
543 | adjust_after_brace rest | |
544 | | x::xs -> x::adjust_after_brace xs in | |
545 | ||
546 | let xs = adjust_after_brace xs in | |
547 | ||
34e49164 C |
548 | (* this deals with any stuff that is between the minused code, eg |
549 | spaces, comments, attributes, etc. *) | |
550 | (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus | |
551 | is because the + code can end up anywhere in the middle of the - code; | |
552 | it is not necessarily to the far left *) | |
708f4980 C |
553 | |
554 | let common_adj (index1,adj1) (index2,adj2) = | |
555 | adj1 = adj2 (* same adjacency info *) && | |
556 | (* non-empty intersection of witness trees *) | |
557 | not ((Common.inter_set index1 index2) = []) in | |
558 | ||
b1b2de81 | 559 | let rec adjust_between_minus xs = |
34e49164 C |
560 | match xs with |
561 | | [] -> [] | |
708f4980 | 562 | | ((T2 (_,Min adj1,_)) as t1)::xs -> |
34e49164 C |
563 | let (between_comments, rest) = |
564 | Common.span is_minusable_comment_or_plus xs in | |
565 | (match rest with | |
002099fc C |
566 | | [] -> |
567 | t1 :: (List.map (set_minus_comment_or_plus adj1) between_comments) | |
34e49164 | 568 | |
708f4980 C |
569 | | ((T2 (_,Min adj2,_)) as t2)::rest when common_adj adj1 adj2 -> |
570 | t1:: | |
571 | (List.map (set_minus_comment_or_plus adj1) between_comments @ | |
572 | adjust_between_minus (t2::rest)) | |
b1b2de81 | 573 | | x::xs -> |
708f4980 | 574 | t1::(between_comments @ adjust_between_minus (x::xs)) |
34e49164 C |
575 | ) |
576 | ||
577 | | x::xs -> x::adjust_between_minus xs in | |
578 | ||
579 | let xs = adjust_between_minus xs in | |
580 | ||
581 | let xs = xs +> Common.exclude (function | |
708f4980 | 582 | | T2 (t,Min adj,_) -> true |
34e49164 C |
583 | | _ -> false |
584 | ) in | |
585 | xs | |
586 | ||
faf9a90c C |
587 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
588 | let adjust_before_semicolon toks = | |
589 | let toks = List.rev toks in | |
002099fc | 590 | let rec search_semic = function |
faf9a90c | 591 | [] -> [] |
002099fc | 592 | | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs -> |
faf9a90c | 593 | if List.mem (str_of_token2 x) [";";")";","] |
002099fc C |
594 | then x :: search_minus false xs |
595 | else x :: search_semic xs | |
596 | | x::xs -> x :: search_semic xs | |
597 | and search_minus seen_minus xs = | |
598 | let (spaces, rest) = Common.span is_space xs in | |
599 | (* only delete spaces if something is actually deleted *) | |
600 | match rest with | |
601 | ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest | |
602 | | _ -> if seen_minus then rest else xs in | |
603 | List.rev (search_semic toks) | |
34e49164 C |
604 | |
605 | let is_ident_like s = s ==~ Common.regexp_alpha | |
606 | ||
607 | let rec add_space xs = | |
608 | match xs with | |
609 | | [] -> [] | |
610 | | [x] -> [x] | |
708f4980 C |
611 | | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs |
612 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && | |
613 | not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly -> | |
614 | (* this only works within a line. could consider whether | |
615 | something should be done to add newlines too, rather than | |
616 | printing them explicitly in unparse_cocci. *) | |
617 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) | |
34e49164 C |
618 | | x::y::xs -> |
619 | let sx = str_of_token2 x in | |
620 | let sy = str_of_token2 y in | |
faf9a90c | 621 | if is_ident_like sx && is_ident_like sy |
34e49164 C |
622 | then x::C2 " "::(add_space (y::xs)) |
623 | else x::(add_space (y::xs)) | |
624 | ||
625 | ||
626 | ||
627 | (* When insert some new code, because of a + in a SP, we must add this | |
628 | * code at the right place, with the good indentation. So each time we | |
629 | * encounter some spacing info, with some newline, we maintain the | |
630 | * current indentation level used. | |
631 | * | |
632 | * TODO problems: not accurate. ex: TODO | |
633 | * | |
634 | * TODO: if in #define region, should add a \ \n | |
635 | *) | |
636 | let new_tabbing2 space = | |
637 | (list_of_string space) | |
638 | +> List.rev | |
b1b2de81 | 639 | +> Common.take_until (fun c -> c =<= '\n') |
34e49164 C |
640 | +> List.rev |
641 | +> List.map string_of_char | |
642 | +> String.concat "" | |
643 | ||
644 | let new_tabbing a = | |
645 | Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) | |
646 | ||
647 | ||
648 | let rec adjust_indentation xs = | |
649 | let _current_tabbing = ref "" in | |
650 | let tabbing_unit = ref None in | |
651 | ||
652 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
653 | ||
654 | (* try to pick a tabbing unit for the plus code *) | |
655 | let adjust_tabbing_unit old_tab new_tab = | |
b1b2de81 | 656 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
34e49164 C |
657 | then |
658 | let old_tab = list_of_string old_tab in | |
659 | let new_tab = list_of_string new_tab in | |
660 | let rec loop = function | |
661 | ([],new_tab) -> | |
662 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
663 | | (_,[]) -> failwith "not possible" | |
664 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
665 | loop (old_tab,new_tab) in | |
666 | ||
667 | let remtab tu current_tab = | |
668 | let current_tab = List.rev(list_of_string current_tab) in | |
669 | let rec loop = function | |
670 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
0708f913 | 671 | | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*) |
b1b2de81 | 672 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) |
34e49164 C |
673 | | (_,ns) -> (* mismatch; remove what we can *) |
674 | string_of_list (List.rev ns) in | |
675 | loop (tu,current_tab) in | |
676 | ||
677 | let rec find_first_tab started = function | |
678 | [] -> () | |
b1b2de81 | 679 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
34e49164 | 680 | find_first_tab true xs |
485bce71 | 681 | (* patch: coccinelle *) |
34e49164 C |
682 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_ |
683 | when started -> | |
684 | let s = str_of_token2 x +> new_tabbing in | |
685 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
686 | | x::xs -> find_first_tab started xs in | |
687 | find_first_tab false xs; | |
688 | ||
689 | let rec aux started xs = | |
690 | match xs with | |
691 | | [] -> [] | |
485bce71 | 692 | (* patch: coccinelle *) |
34e49164 | 693 | | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: |
708f4980 C |
694 | ((Cocci2 ("{",_,_,_)) as a)::xs |
695 | when started && str_of_token2 x =$= ")" -> | |
34e49164 | 696 | (* to be done for if, etc, but not for a function header *) |
708f4980 | 697 | x::(C2 " ")::a::(aux started xs) |
34e49164 C |
698 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs -> |
699 | let old_tabbing = !_current_tabbing in | |
700 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s); | |
701 | (* only trust the indentation after the first { *) | |
702 | (if started then adjust_tabbing_unit old_tabbing !_current_tabbing); | |
703 | let coccis_rest = Common.span all_coccis xs in | |
704 | (match coccis_rest with | |
b1b2de81 | 705 | (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" -> |
34e49164 C |
706 | (* the case where cocci code has been added before a close } *) |
707 | x::aux started (Indent_cocci2::xs) | |
708 | | _ -> x::aux started xs) | |
709 | | Indent_cocci2::xs -> | |
710 | (match !tabbing_unit with | |
711 | None -> aux started xs | |
712 | | Some (tu,_) -> | |
713 | _current_tabbing := (!_current_tabbing)^tu; | |
708f4980 | 714 | Cocci2 (tu,-1,-1,-1)::aux started xs) |
34e49164 C |
715 | | Unindent_cocci2::xs -> |
716 | (match !tabbing_unit with | |
717 | None -> aux started xs | |
718 | | Some (_,tu) -> | |
719 | _current_tabbing := remtab tu (!_current_tabbing); | |
720 | aux started xs) | |
721 | (* border between existing code and cocci code *) | |
708f4980 | 722 | | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs |
b1b2de81 | 723 | when str_of_token2 x =$= "{" -> |
34e49164 C |
724 | x::aux true (y::Indent_cocci2::xs) |
725 | | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs | |
b1b2de81 | 726 | when str_of_token2 y =$= "}" -> |
34e49164 C |
727 | x::aux started (y::Unindent_cocci2::xs) |
728 | (* starting the body of the function *) | |
b1b2de81 | 729 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs |
708f4980 C |
730 | | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs |
731 | | ((Cocci2("\n",_,_,_)) as x)::xs -> | |
0708f913 | 732 | (* dont inline in expr because of weird eval order of ocaml *) |
34e49164 | 733 | let s = !_current_tabbing in |
708f4980 | 734 | x::Cocci2 (s,-1,-1,-1)::aux started xs |
34e49164 C |
735 | | x::xs -> x::aux started xs in |
736 | aux false xs | |
737 | ||
738 | ||
739 | let rec find_paren_comma = function | |
740 | | [] -> () | |
741 | ||
742 | (* do nothing if was like this in original file *) | |
743 | | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
b1b2de81 | 744 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
745 | find_paren_comma (x2::xs) |
746 | ||
747 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
b1b2de81 | 748 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
749 | find_paren_comma (x2::xs) |
750 | ||
751 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) | |
b1b2de81 | 752 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
753 | find_paren_comma (x2::xs) |
754 | ||
755 | (* otherwise yes can adjust *) | |
756 | | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> | |
757 | x2.remove <- true; | |
758 | find_paren_comma (x2::xs) | |
759 | | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> | |
760 | x1.remove <- true; | |
761 | find_paren_comma (x2::xs) | |
762 | ||
763 | | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> | |
764 | x1.remove <- true; | |
765 | find_paren_comma (x2::xs) | |
766 | ||
767 | | x::xs -> | |
768 | find_paren_comma xs | |
769 | ||
770 | ||
771 | let fix_tokens toks = | |
772 | let toks = toks +> List.map mk_token_extended in | |
773 | ||
774 | let cleaner = toks +> Common.exclude (function | |
775 | | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
776 | | _ -> false | |
777 | ) in | |
778 | find_paren_comma cleaner; | |
779 | ||
780 | let toks = rebuild_tokens_extented toks in | |
781 | toks +> List.map (fun x -> x.tok2) | |
782 | ||
783 | ||
784 | ||
785 | (*****************************************************************************) | |
786 | (* Final unparsing (and debugging support) *) | |
787 | (*****************************************************************************) | |
788 | ||
789 | (* for debugging *) | |
790 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
791 | ||
792 | let kind_of_token2 = function | |
793 | | Fake2 -> KFake | |
794 | | Cocci2 _ -> KCocci | |
795 | | C2 _ -> KC | |
796 | | T2 (t,_,_) -> | |
797 | (match TH.pinfo_of_tok t with | |
798 | | ExpandedTok _ -> KExpanded | |
799 | | OriginTok _ -> KOrigin | |
800 | | FakeTok _ -> raise Impossible (* now a Fake2 *) | |
801 | | AbstractLineTok _ -> raise Impossible (* now a KC *) | |
802 | ) | |
803 | | Unindent_cocci2 | Indent_cocci2 -> raise Impossible | |
804 | ||
805 | let end_mark = "!" | |
806 | ||
807 | let start_mark = function | |
808 | | KFake -> "!F!" | |
809 | | KCocci -> "!S!" | |
810 | | KC -> "!A!" | |
811 | | KExpanded -> "!E!" | |
812 | | KOrigin -> "" | |
813 | ||
814 | let print_all_tokens2 pr xs = | |
815 | if !Flag_parsing_c.debug_unparsing | |
816 | then | |
817 | let current_kind = ref KOrigin in | |
818 | xs +> List.iter (fun t -> | |
819 | let newkind = kind_of_token2 t in | |
b1b2de81 | 820 | if newkind =*= !current_kind |
34e49164 C |
821 | then pr (str_of_token2 t) |
822 | else begin | |
823 | pr (end_mark); | |
824 | pr (start_mark newkind); | |
825 | pr (str_of_token2 t); | |
826 | current_kind := newkind | |
827 | end | |
828 | ); | |
829 | else | |
830 | xs +> List.iter (fun x -> pr (str_of_token2 x)) | |
831 | ||
832 | ||
833 | ||
834 | ||
835 | (*****************************************************************************) | |
836 | (* Entry points *) | |
837 | (*****************************************************************************) | |
838 | ||
839 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
840 | * complete C file, including a modified C file by transformation.ml, | |
841 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
842 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
843 | * fancy stuff when a function was not modified at all. Just need to | |
844 | * print the list of token as-is. But now pretty_print_c.ml handles | |
845 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
846 | * optimize a little the pretty printing. | |
847 | * | |
848 | * update: now have PPviastr which goes even faster than PPviatok, so | |
849 | * PPviatok has disappeared. | |
850 | *) | |
851 | ||
852 | type ppmethod = PPnormal | PPviastr | |
853 | ||
854 | ||
855 | ||
856 | ||
857 | (* The pp_program function will call pretty_print_c.ml with a special | |
858 | * function to print the leaf components, the tokens. When we want to | |
859 | * print a token, we need to print also maybe the space and comments that | |
860 | * were close to it in the original file (and that was omitted during the | |
861 | * parsing phase), and honor what the cocci-info attached to the token says. | |
862 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
863 | * print it and also print some cocci-code attached in a PLUS to it. | |
864 | * So we will also maybe call unparse_cocci. Because the cocci-code may | |
865 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
866 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
867 | *) | |
868 | ||
869 | let pp_program2 xs outfile = | |
870 | Common.with_open_outfile outfile (fun (pr,chan) -> | |
871 | let pr s = | |
872 | if !Flag_parsing_c.debug_unparsing | |
873 | then begin pr2_no_nl s; flush stderr end | |
874 | else pr s | |
875 | (* flush chan; *) | |
876 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
877 | in | |
878 | ||
879 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
34e49164 C |
880 | (* here can still work on ast *) |
881 | let e = remove_useless_fakeInfo_struct e in | |
882 | ||
883 | match ppmethod with | |
002099fc | 884 | | PPnormal -> |
34e49164 C |
885 | (* now work on tokens *) |
886 | ||
887 | (* phase1: just get all the tokens, all the information *) | |
888 | assert(toks_e +> List.for_all (fun t -> | |
889 | TH.is_origin t or TH.is_expanded t | |
890 | )); | |
891 | let toks = get_fakeInfo_and_tokens e toks_e in | |
892 | let toks = displace_fake_nodes toks in | |
893 | (* assert Origin;ExpandedTok;Faketok *) | |
894 | let toks = expand_mcode toks in | |
895 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) | |
896 | * and no tag information, just NOTHING. *) | |
897 | ||
898 | (* phase2: can now start to filter and adjust *) | |
899 | let toks = adjust_indentation toks in | |
002099fc | 900 | let toks = adjust_before_semicolon toks in (* before remove minus *) |
34e49164 C |
901 | let toks = remove_minus_and_between_and_expanded_and_fake toks in |
902 | (* assert Origin + Cocci + C and no minus *) | |
903 | let toks = add_space toks in | |
904 | let toks = fix_tokens toks in | |
905 | ||
906 | (* in theory here could reparse and rework the ast! or | |
907 | * apply some SP. Not before cos julia may have generated | |
908 | * not parsable file. Need do unparsing_tricks call before being | |
909 | * ready to reparse. *) | |
910 | print_all_tokens2 pr toks; | |
911 | ||
912 | | PPviastr -> pr str | |
913 | ) | |
914 | ) | |
915 | ||
916 | let pp_program a b = | |
917 | Common.profile_code "C unparsing" (fun () -> pp_program2 a b) | |
918 | ||
485bce71 C |
919 | |
920 | let pp_program_default xs outfile = | |
921 | let xs' = xs +> List.map (fun x -> x, PPnormal) in | |
922 | pp_program xs' outfile |