Commit | Line | Data |
---|---|---|
0708f913 C |
1 | (* Yoann Padioleau, Julia Lawall |
2 | * | |
3 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU | |
34e49164 C |
4 | * |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License (GPL) | |
7 | * version 2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * file license.txt for more details. | |
485bce71 C |
13 | * |
14 | * | |
15 | * Modifications by Julia Lawall for better newline handling. | |
34e49164 C |
16 | *) |
17 | open Common | |
18 | ||
19 | open Ast_c | |
20 | ||
21 | module TH = Token_helpers | |
22 | ||
23 | ||
b1b2de81 C |
24 | (* should keep comments and directives in between adjacent deleted terms, |
25 | but not comments and directives within deleted terms. should use the | |
26 | labels found in the control-flow graph *) | |
27 | ||
34e49164 C |
28 | |
29 | ||
708f4980 C |
30 | (*****************************************************************************) |
31 | (* Wrappers *) | |
32 | (*****************************************************************************) | |
33 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing | |
34 | ||
34e49164 C |
35 | (*****************************************************************************) |
36 | (* Types used during the intermediate phases of the unparsing *) | |
37 | (*****************************************************************************) | |
38 | ||
39 | type token1 = | |
40 | | Fake1 of info | |
41 | | T1 of Parser_c.token | |
42 | ||
43 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
44 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
45 | * token and get something simpler ? because we need to know if the | |
46 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
47 | * analysis so easier to keep with the token. | |
48 | * | |
49 | * This type contains the whole information. Have all the tokens with this | |
50 | * type. | |
51 | *) | |
708f4980 C |
52 | type min = |
53 | Min of (int list (* match numbers *) * int (* adjacency information *)) | |
54 | | Ctx | |
55 | ||
34e49164 | 56 | type token2 = |
708f4980 | 57 | | T2 of Parser_c.token * min * |
34e49164 C |
58 | int option (* orig index, abstracting away comments and space *) |
59 | | Fake2 | |
708f4980 | 60 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
34e49164 C |
61 | | C2 of string |
62 | | Indent_cocci2 | |
63 | | Unindent_cocci2 | |
64 | ||
65 | (* not used yet *) | |
66 | type token3 = | |
67 | | T3 of Parser_c.token | |
68 | | Cocci3 of string | |
69 | | C3 of string | |
70 | ||
71 | ||
72 | (* similar to the tech in parsing_hack *) | |
73 | type token_extended = { | |
74 | tok2 : token2; | |
75 | str : string; | |
76 | idx: int option; (* to know if 2 tokens were consecutive in orig file *) | |
77 | mutable new_tokens_before : token2 list; | |
78 | mutable remove : bool; | |
79 | } | |
80 | ||
81 | ||
82 | (*****************************************************************************) | |
83 | (* Helpers *) | |
84 | (*****************************************************************************) | |
85 | ||
86 | let info_of_token1 t = | |
87 | match t with | |
88 | | Fake1 info -> info | |
89 | | T1 tok -> TH.info_of_tok tok | |
90 | ||
91 | let str_of_token2 = function | |
92 | | T2 (t,_,_) -> TH.str_of_tok t | |
93 | | Fake2 -> "" | |
708f4980 | 94 | | Cocci2 (s,_,_,_) -> s |
34e49164 C |
95 | | C2 s -> s |
96 | | Indent_cocci2 -> "" | |
97 | | Unindent_cocci2 -> "" | |
98 | ||
99 | let print_token2 = function | |
708f4980 C |
100 | | T2 (t,b,_) -> |
101 | let b_str = | |
102 | match b with | |
103 | Min (index,adj) -> | |
104 | Printf.sprintf "-%d[%s]" adj | |
105 | (String.concat " " (List.map string_of_int index)) | |
106 | | Ctx -> "" in | |
107 | "T2:"^b_str^TH.str_of_tok t | |
34e49164 | 108 | | Fake2 -> "" |
708f4980 | 109 | | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
34e49164 C |
110 | | C2 s -> "C2:"^s |
111 | | Indent_cocci2 -> "Indent" | |
112 | | Unindent_cocci2 -> "Unindent" | |
113 | ||
708f4980 | 114 | let simple_print_all_tokens2 l = |
34e49164 C |
115 | List.iter (function x -> Printf.printf "%s " (print_token2 x)) l; |
116 | Printf.printf "\n" | |
117 | ||
118 | let str_of_token3 = function | |
119 | | T3 t -> TH.str_of_tok t | |
120 | | Cocci3 s | C3 s -> s | |
121 | ||
122 | ||
123 | ||
124 | let mk_token_extended x = | |
125 | let origidx = | |
126 | match x with | |
127 | | T2 (_,_, idx) -> idx | |
128 | | _ -> None | |
129 | in | |
130 | { tok2 = x; | |
131 | str = str_of_token2 x; | |
132 | idx = origidx; | |
133 | new_tokens_before = []; | |
134 | remove = false; | |
135 | } | |
136 | ||
137 | let rebuild_tokens_extented toks_ext = | |
138 | let _tokens = ref [] in | |
139 | toks_ext +> List.iter (fun tok -> | |
140 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); | |
141 | if not tok.remove then push2 tok.tok2 _tokens; | |
142 | ); | |
143 | let tokens = List.rev !_tokens in | |
144 | (tokens +> List.map mk_token_extended) | |
145 | ||
146 | ||
147 | let mcode_contain_plus = function | |
148 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
149 | | Ast_cocci.CONTEXT _ -> true | |
485bce71 | 150 | (* patch: when need full coccinelle transformation *) |
708f4980 C |
151 | | Ast_cocci.MINUS (_,_,_,[]) -> false |
152 | | Ast_cocci.MINUS (_,_,_,x::xs) -> true | |
34e49164 C |
153 | | Ast_cocci.PLUS -> raise Impossible |
154 | ||
155 | let contain_plus info = | |
156 | let mck = Ast_c.mcode_of_info info in | |
157 | mcode_contain_plus mck | |
158 | ||
159 | (*****************************************************************************) | |
160 | (* Last fix on the ast *) | |
161 | (*****************************************************************************) | |
162 | ||
163 | (* Because of the ugly trick to handle initialiser, I generate fake ',' | |
164 | * for the last initializer element, but if there is nothing around it, | |
165 | * I don't want in the end to print it. | |
166 | *) | |
167 | ||
168 | let remove_useless_fakeInfo_struct program = | |
169 | let bigf = { Visitor_c.default_visitor_c_s with | |
170 | Visitor_c.kini_s = (fun (k,bigf) ini -> | |
171 | match k ini with | |
113803cf | 172 | | InitList args, ii -> |
34e49164 C |
173 | (match ii with |
174 | | [_i1;_i2] -> ini | |
113803cf | 175 | | [i1;i2;iicommaopt] -> |
34e49164 C |
176 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) |
177 | && (Ast_c.is_fake iicommaopt) | |
178 | (* sometimes the guy put a normal iicommaopt *) | |
34e49164 C |
179 | then InitList args, [i1;i2] |
180 | else InitList args, [i1;i2;iicommaopt] | |
113803cf C |
181 | | [i1;i2;iicommaopt;end_comma_opt] -> |
182 | (* only in #define. end_comma_opt canot be fake *) | |
183 | (* not sure if this will be considered ambiguous with a previous | |
184 | case? *) | |
185 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) | |
186 | && (Ast_c.is_fake iicommaopt) | |
187 | (* sometimes the guy put a normal iicommaopt *) | |
188 | then InitList args, [i1;i2;end_comma_opt] | |
189 | else InitList args, [i1;i2;iicommaopt;end_comma_opt] | |
34e49164 C |
190 | | _ -> raise Impossible |
191 | ) | |
192 | | x -> x | |
193 | ) | |
194 | } in | |
195 | Visitor_c.vk_toplevel_s bigf program | |
196 | ||
197 | ||
198 | (*****************************************************************************) | |
199 | (* Tokens1 generation *) | |
200 | (*****************************************************************************) | |
201 | ||
202 | let get_fakeInfo_and_tokens celem toks = | |
203 | let toks_in = ref toks in | |
204 | let toks_out = ref [] in | |
205 | ||
206 | (* todo? verify good order of position ? *) | |
207 | let pr_elem info = | |
208 | match Ast_c.pinfo_of_info info with | |
209 | | FakeTok _ -> | |
210 | Common.push2 (Fake1 info) toks_out | |
211 | | OriginTok _ | ExpandedTok _ -> | |
212 | (* get the associated comments/space/cppcomment tokens *) | |
213 | let (before, x, after) = !toks_in +> Common.split_when (fun tok -> | |
214 | info =*= TH.info_of_tok tok) | |
215 | in | |
b1b2de81 | 216 | assert(info =*= TH.info_of_tok x); |
34e49164 C |
217 | (*old: assert(before +> List.for_all (TH.is_comment)); *) |
218 | before +> List.iter (fun x -> | |
219 | if not (TH.is_comment x) | |
0708f913 | 220 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) |
34e49164 C |
221 | (* case such as int asm d3("x"); not yet in ast *) |
222 | ); | |
223 | before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); | |
224 | push2 (T1 x) toks_out; | |
225 | toks_in := after; | |
226 | | AbstractLineTok _ -> | |
227 | (* can be called on type info when for instance use -type_c *) | |
228 | if !Flag_parsing_c.pretty_print_type_info | |
229 | then Common.push2 (Fake1 info) toks_out | |
230 | else raise Impossible (* at this stage *) | |
231 | in | |
232 | ||
233 | let pr_space _ = () in (* use the spacing that is there already *) | |
234 | ||
235 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
236 | ||
237 | if not (null !toks_in) | |
0708f913 | 238 | then failwith "WEIRD: unparsing not finished"; |
34e49164 C |
239 | |
240 | List.rev !toks_out | |
241 | ||
242 | (* Fake nodes that have BEFORE code should be moved over any subsequent | |
243 | whitespace and newlines, but not any comments, to get as close to the affected | |
244 | code as possible. Similarly, face nodes that have AFTER code should be moved | |
245 | backwards. No fake nodes should have both before and after code. *) | |
246 | ||
247 | let displace_fake_nodes toks = | |
248 | let is_fake = function Fake1 _ -> true | _ -> false in | |
249 | let is_whitespace = function | |
250 | T1(Parser_c.TCommentSpace _) | |
485bce71 | 251 | (* patch: cocci *) |
34e49164 C |
252 | | T1(Parser_c.TCommentNewline _) -> true |
253 | | _ -> false in | |
254 | let rec loop toks = | |
255 | let fake_info = | |
256 | try Some (Common.split_when is_fake toks) | |
257 | with Not_found -> None in | |
258 | match fake_info with | |
259 | Some(bef,((Fake1 info) as fake),aft) -> | |
260 | (match !(info.cocci_tag) with | |
708f4980 C |
261 | | Some x -> |
262 | (match x with | |
34e49164 C |
263 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> |
264 | (* move the fake node forwards *) | |
265 | let (whitespace,rest) = Common.span is_whitespace aft in | |
266 | bef @ whitespace @ fake :: (loop rest) | |
267 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
268 | (* move the fake node backwards *) | |
269 | let revbef = List.rev bef in | |
270 | let (revwhitespace,revprev) = Common.span is_whitespace revbef in | |
271 | let whitespace = List.rev revwhitespace in | |
272 | let prev = List.rev revprev in | |
273 | prev @ fake :: (loop (whitespace @ aft)) | |
274 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) -> | |
275 | bef @ fake :: (loop aft) | |
276 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
277 | failwith "fake node should not be before-after" | |
485bce71 C |
278 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) |
279 | ) | |
708f4980 C |
280 | | None -> |
281 | bef @ fake :: (loop aft) | |
282 | ) | |
34e49164 C |
283 | | None -> toks |
284 | | _ -> raise Impossible in | |
285 | loop toks | |
286 | ||
287 | (*****************************************************************************) | |
288 | (* Tokens2 generation *) | |
289 | (*****************************************************************************) | |
290 | ||
b1b2de81 C |
291 | let comment2t2 = function |
292 | (Token_c.TCommentCpp x,(info : Token_c.info)) -> | |
293 | C2("\n"^info.Common.str^"\n") | |
294 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x)) | |
295 | ||
34e49164 C |
296 | let expand_mcode toks = |
297 | let toks_out = ref [] in | |
298 | ||
299 | let index = ref 0 in | |
300 | ||
301 | let add_elem t minus = | |
302 | match t with | |
303 | | Fake1 info -> | |
304 | let str = Ast_c.str_of_info info in | |
b1b2de81 | 305 | if str =$= "" |
34e49164 C |
306 | then push2 (Fake2) toks_out |
307 | (* perhaps the fake ',' *) | |
308 | else push2 (C2 str) toks_out | |
309 | ||
310 | ||
311 | | T1 tok -> | |
b1b2de81 | 312 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
34e49164 | 313 | (* no tag on expandedTok ! *) |
0708f913 C |
314 | (if (TH.is_expanded tok && |
315 | !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot) | |
316 | then | |
317 | failwith | |
318 | (Printf.sprintf | |
319 | "expanded token %s on line %d is either modified or stored in a metavariable" | |
320 | (TH.str_of_tok tok) (TH.line_of_tok tok))); | |
34e49164 C |
321 | |
322 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> | |
323 | { i with cocci_tag = ref Ast_c.emptyAnnot; } | |
324 | ) in | |
325 | ||
326 | let optindex = | |
327 | if TH.is_origin tok && not (TH.is_real_comment tok) | |
328 | then begin | |
329 | incr index; | |
330 | Some !index | |
331 | end | |
332 | else None | |
333 | in | |
334 | ||
335 | push2 (T2 (tok', minus, optindex)) toks_out | |
336 | in | |
337 | ||
338 | let expand_info t = | |
708f4980 C |
339 | let (mcode,env) = |
340 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in | |
34e49164 | 341 | |
708f4980 C |
342 | let pr_cocci s ln col rcol = |
343 | push2 (Cocci2(s,ln,col,rcol)) toks_out in | |
34e49164 | 344 | let pr_c info = |
b1b2de81 C |
345 | (match Ast_c.pinfo_of_info info with |
346 | Ast_c.AbstractLineTok _ -> | |
347 | push2 (C2 (Ast_c.str_of_info info)) toks_out | |
348 | | Ast_c.FakeTok (s,_) -> | |
349 | push2 (C2 s) toks_out | |
34e49164 C |
350 | | _ -> |
351 | Printf.printf "line: %s\n" (Common.dump info); | |
b1b2de81 C |
352 | failwith "not an abstract line"); |
353 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> | |
354 | List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in | |
355 | ||
708f4980 C |
356 | let pr_barrier ln col = (* marks a position, used around C code *) |
357 | push2 (Cocci2("",ln,col,col)) toks_out in | |
358 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) | |
b1b2de81 | 359 | |
708f4980 | 360 | let pr_cspace _ = push2 (C2 " ") toks_out in |
34e49164 | 361 | |
708f4980 C |
362 | let pr_space _ = () (* rely on add_space in cocci code *) in |
363 | let pr_arity _ = () (* not interested *) in | |
34e49164 C |
364 | |
365 | let indent _ = push2 Indent_cocci2 toks_out in | |
366 | let unindent _ = push2 Unindent_cocci2 toks_out in | |
367 | ||
708f4980 C |
368 | let args_pp = |
369 | (env, pr_cocci, pr_c, pr_cspace, | |
370 | (match !Flag_parsing_c.spacing with | |
371 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), | |
372 | pr_arity, | |
373 | (match !Flag_parsing_c.spacing with | |
374 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), | |
375 | indent, unindent) in | |
34e49164 | 376 | |
485bce71 C |
377 | (* old: when for yacfe with partial cocci: |
378 | * add_elem t false; | |
379 | *) | |
34e49164 | 380 | |
485bce71 | 381 | (* patch: when need full coccinelle transformation *) |
faf9a90c | 382 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
34e49164 | 383 | match mcode with |
708f4980 | 384 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
34e49164 C |
385 | (* Why adding ? because I want to have all the information, the whole |
386 | * set of tokens, so I can then process and remove the | |
387 | * is_between_two_minus for instance *) | |
708f4980 | 388 | add_elem t (Min (inst,adj)); |
faf9a90c | 389 | unparser any_xxs Unparse_cocci.InPlace |
34e49164 C |
390 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
391 | (match any_befaft with | |
392 | | Ast_cocci.NOTHING -> | |
708f4980 | 393 | add_elem t Ctx |
34e49164 | 394 | | Ast_cocci.BEFORE xxs -> |
faf9a90c | 395 | unparser xxs Unparse_cocci.Before; |
708f4980 | 396 | add_elem t Ctx |
34e49164 | 397 | | Ast_cocci.AFTER xxs -> |
708f4980 | 398 | add_elem t Ctx; |
faf9a90c | 399 | unparser xxs Unparse_cocci.After; |
34e49164 | 400 | | Ast_cocci.BEFOREAFTER (xxs, yys) -> |
faf9a90c | 401 | unparser xxs Unparse_cocci.Before; |
708f4980 | 402 | add_elem t Ctx; |
faf9a90c | 403 | unparser yys Unparse_cocci.After; |
34e49164 C |
404 | ) |
405 | | Ast_cocci.PLUS -> raise Impossible | |
406 | ||
407 | in | |
408 | ||
409 | toks +> List.iter expand_info; | |
410 | List.rev !toks_out | |
411 | ||
412 | ||
413 | (*****************************************************************************) | |
414 | (* Tokens2 processing, filtering, adjusting *) | |
415 | (*****************************************************************************) | |
416 | ||
417 | let is_minusable_comment = function | |
b1b2de81 | 418 | | (T2 (t,_b,_i)) -> |
34e49164 C |
419 | (match t with |
420 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
485bce71 | 421 | (* patch: coccinelle *) |
34e49164 C |
422 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) |
423 | | Parser_c.TComment _ | |
0708f913 | 424 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
b1b2de81 C |
425 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
426 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *) | |
34e49164 C |
427 | -> true |
428 | ||
b1b2de81 | 429 | | Parser_c.TCommentMisc _ |
0708f913 | 430 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) |
34e49164 C |
431 | -> false |
432 | ||
433 | | _ -> false | |
434 | ) | |
435 | | _ -> false | |
436 | ||
437 | let all_coccis = function | |
438 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true | |
439 | | _ -> false | |
440 | ||
b1b2de81 C |
441 | (*previously gave up if the first character was a newline, but not clear why*) |
442 | let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x | |
34e49164 | 443 | |
708f4980 C |
444 | let set_minus_comment adj = function |
445 | | T2 (t,Ctx,idx) -> | |
34e49164 C |
446 | let str = TH.str_of_tok t in |
447 | (match t with | |
448 | | Parser_c.TCommentSpace _ | |
485bce71 | 449 | (* patch: coccinelle *) |
34e49164 C |
450 | | Parser_c.TCommentNewline _ -> () |
451 | ||
452 | | Parser_c.TComment _ | |
0708f913 | 453 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
b1b2de81 C |
454 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
455 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
34e49164 | 456 | -> |
b1b2de81 C |
457 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" |
458 | (TH.line_of_tok t) str) | |
34e49164 C |
459 | | _ -> raise Impossible |
460 | ); | |
708f4980 | 461 | T2 (t, Min adj, idx) |
485bce71 | 462 | (* patch: coccinelle *) |
708f4980 | 463 | | T2 (Parser_c.TCommentNewline _,Min adj,idx) as x -> x |
34e49164 C |
464 | | _ -> raise Impossible |
465 | ||
708f4980 | 466 | let set_minus_comment_or_plus adj = function |
34e49164 | 467 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x |
708f4980 | 468 | | x -> set_minus_comment adj x |
34e49164 C |
469 | |
470 | let remove_minus_and_between_and_expanded_and_fake xs = | |
471 | ||
472 | (* get rid of exampled and fake tok *) | |
473 | let xs = xs +> Common.exclude (function | |
474 | | T2 (t,_,_) when TH.is_expanded t -> true | |
475 | | Fake2 -> true | |
476 | ||
477 | | _ -> false | |
478 | ) | |
479 | in | |
480 | ||
481 | (*This drops the space before each completely minused block (no plus code).*) | |
113803cf | 482 | let minus_or_comment = function |
708f4980 | 483 | T2(_,Min adj,_) -> true |
113803cf C |
484 | | T2(Parser_c.TCommentNewline _,_b,_i) -> false |
485 | | x -> is_minusable_comment x in | |
486 | ||
34e49164 C |
487 | let rec adjust_before_minus = function |
488 | [] -> [] | |
485bce71 | 489 | (* patch: coccinelle *) |
708f4980 C |
490 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x):: |
491 | ((T2(_,Min adj,_)::_) as xs) -> | |
34e49164 C |
492 | let (between_minus,rest) = Common.span minus_or_comment xs in |
493 | (match rest with | |
708f4980 | 494 | [] -> (set_minus_comment adj x) :: between_minus |
34e49164 | 495 | | T2(Parser_c.TCommentNewline _,_b,_i)::_ -> |
708f4980 | 496 | (set_minus_comment adj x) :: between_minus @ |
34e49164 C |
497 | (adjust_before_minus rest) |
498 | | _ -> x :: between_minus @ (adjust_before_minus rest)) | |
499 | | x::xs -> x::adjust_before_minus xs in | |
500 | ||
501 | let xs = adjust_before_minus xs in | |
502 | ||
113803cf C |
503 | (* this drops blank lines after a brace introduced by removing code *) |
504 | let rec adjust_after_brace = function | |
505 | [] -> [] | |
708f4980 | 506 | | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs) |
b1b2de81 | 507 | when str_of_token2 x =$= "{" -> |
113803cf C |
508 | let (between_minus,rest) = Common.span minus_or_comment xs in |
509 | let is_whitespace = function | |
510 | T2(Parser_c.TCommentSpace _,_b,_i) | |
511 | (* patch: cocci *) | |
512 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
513 | | _ -> false in | |
514 | let (newlines,rest) = Common.span is_whitespace rest in | |
515 | let (drop_newlines,last_newline) = | |
516 | let rec loop = function | |
517 | [] -> ([],[]) | |
518 | | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest -> | |
519 | (List.rev rest,[x]) | |
520 | | x::xs -> | |
521 | let (drop_newlines,last_newline) = loop xs in | |
522 | (drop_newlines,x::last_newline) in | |
523 | loop (List.rev newlines) in | |
708f4980 | 524 | x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@ |
113803cf C |
525 | last_newline@ |
526 | adjust_after_brace rest | |
527 | | x::xs -> x::adjust_after_brace xs in | |
528 | ||
529 | let xs = adjust_after_brace xs in | |
530 | ||
34e49164 C |
531 | (* this deals with any stuff that is between the minused code, eg |
532 | spaces, comments, attributes, etc. *) | |
533 | (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus | |
534 | is because the + code can end up anywhere in the middle of the - code; | |
535 | it is not necessarily to the far left *) | |
708f4980 C |
536 | |
537 | let common_adj (index1,adj1) (index2,adj2) = | |
538 | adj1 = adj2 (* same adjacency info *) && | |
539 | (* non-empty intersection of witness trees *) | |
540 | not ((Common.inter_set index1 index2) = []) in | |
541 | ||
b1b2de81 | 542 | let rec adjust_between_minus xs = |
34e49164 C |
543 | match xs with |
544 | | [] -> [] | |
708f4980 | 545 | | ((T2 (_,Min adj1,_)) as t1)::xs -> |
34e49164 C |
546 | let (between_comments, rest) = |
547 | Common.span is_minusable_comment_or_plus xs in | |
548 | (match rest with | |
708f4980 | 549 | | [] -> [t1] |
34e49164 | 550 | |
708f4980 C |
551 | | ((T2 (_,Min adj2,_)) as t2)::rest when common_adj adj1 adj2 -> |
552 | t1:: | |
553 | (List.map (set_minus_comment_or_plus adj1) between_comments @ | |
554 | adjust_between_minus (t2::rest)) | |
b1b2de81 | 555 | | x::xs -> |
708f4980 | 556 | t1::(between_comments @ adjust_between_minus (x::xs)) |
34e49164 C |
557 | ) |
558 | ||
559 | | x::xs -> x::adjust_between_minus xs in | |
560 | ||
561 | let xs = adjust_between_minus xs in | |
562 | ||
563 | let xs = xs +> Common.exclude (function | |
708f4980 | 564 | | T2 (t,Min adj,_) -> true |
34e49164 C |
565 | | _ -> false |
566 | ) in | |
567 | xs | |
568 | ||
faf9a90c C |
569 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
570 | let adjust_before_semicolon toks = | |
571 | let toks = List.rev toks in | |
572 | let rec loop = function | |
573 | [] -> [] | |
708f4980 | 574 | | ((T2(_,Ctx,_)) as x)::xs -> |
faf9a90c C |
575 | if List.mem (str_of_token2 x) [";";")";","] |
576 | then | |
577 | let (spaces, rest) = Common.span is_minusable_comment xs in | |
578 | (match rest with | |
708f4980 | 579 | (T2(_,Min _,_))::_ | (Cocci2 _)::_ -> |
faf9a90c C |
580 | (* only drop spaces if something was actually changed before *) |
581 | x :: loop rest | |
582 | | _ -> x :: loop xs) | |
583 | else x :: loop xs | |
584 | | x::xs -> x :: loop xs in | |
585 | List.rev (loop toks) | |
34e49164 C |
586 | |
587 | let is_ident_like s = s ==~ Common.regexp_alpha | |
588 | ||
589 | let rec add_space xs = | |
590 | match xs with | |
591 | | [] -> [] | |
592 | | [x] -> [x] | |
708f4980 C |
593 | | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs |
594 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && | |
595 | not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly -> | |
596 | (* this only works within a line. could consider whether | |
597 | something should be done to add newlines too, rather than | |
598 | printing them explicitly in unparse_cocci. *) | |
599 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) | |
34e49164 C |
600 | | x::y::xs -> |
601 | let sx = str_of_token2 x in | |
602 | let sy = str_of_token2 y in | |
faf9a90c | 603 | if is_ident_like sx && is_ident_like sy |
34e49164 C |
604 | then x::C2 " "::(add_space (y::xs)) |
605 | else x::(add_space (y::xs)) | |
606 | ||
607 | ||
608 | ||
609 | (* When insert some new code, because of a + in a SP, we must add this | |
610 | * code at the right place, with the good indentation. So each time we | |
611 | * encounter some spacing info, with some newline, we maintain the | |
612 | * current indentation level used. | |
613 | * | |
614 | * TODO problems: not accurate. ex: TODO | |
615 | * | |
616 | * TODO: if in #define region, should add a \ \n | |
617 | *) | |
618 | let new_tabbing2 space = | |
619 | (list_of_string space) | |
620 | +> List.rev | |
b1b2de81 | 621 | +> Common.take_until (fun c -> c =<= '\n') |
34e49164 C |
622 | +> List.rev |
623 | +> List.map string_of_char | |
624 | +> String.concat "" | |
625 | ||
626 | let new_tabbing a = | |
627 | Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) | |
628 | ||
629 | ||
630 | let rec adjust_indentation xs = | |
631 | let _current_tabbing = ref "" in | |
632 | let tabbing_unit = ref None in | |
633 | ||
634 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
635 | ||
636 | (* try to pick a tabbing unit for the plus code *) | |
637 | let adjust_tabbing_unit old_tab new_tab = | |
b1b2de81 | 638 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
34e49164 C |
639 | then |
640 | let old_tab = list_of_string old_tab in | |
641 | let new_tab = list_of_string new_tab in | |
642 | let rec loop = function | |
643 | ([],new_tab) -> | |
644 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
645 | | (_,[]) -> failwith "not possible" | |
646 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
647 | loop (old_tab,new_tab) in | |
648 | ||
649 | let remtab tu current_tab = | |
650 | let current_tab = List.rev(list_of_string current_tab) in | |
651 | let rec loop = function | |
652 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
0708f913 | 653 | | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*) |
b1b2de81 | 654 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) |
34e49164 C |
655 | | (_,ns) -> (* mismatch; remove what we can *) |
656 | string_of_list (List.rev ns) in | |
657 | loop (tu,current_tab) in | |
658 | ||
659 | let rec find_first_tab started = function | |
660 | [] -> () | |
b1b2de81 | 661 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
34e49164 | 662 | find_first_tab true xs |
485bce71 | 663 | (* patch: coccinelle *) |
34e49164 C |
664 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_ |
665 | when started -> | |
666 | let s = str_of_token2 x +> new_tabbing in | |
667 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
668 | | x::xs -> find_first_tab started xs in | |
669 | find_first_tab false xs; | |
670 | ||
671 | let rec aux started xs = | |
672 | match xs with | |
673 | | [] -> [] | |
485bce71 | 674 | (* patch: coccinelle *) |
34e49164 | 675 | | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: |
708f4980 C |
676 | ((Cocci2 ("{",_,_,_)) as a)::xs |
677 | when started && str_of_token2 x =$= ")" -> | |
34e49164 | 678 | (* to be done for if, etc, but not for a function header *) |
708f4980 | 679 | x::(C2 " ")::a::(aux started xs) |
34e49164 C |
680 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs -> |
681 | let old_tabbing = !_current_tabbing in | |
682 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s); | |
683 | (* only trust the indentation after the first { *) | |
684 | (if started then adjust_tabbing_unit old_tabbing !_current_tabbing); | |
685 | let coccis_rest = Common.span all_coccis xs in | |
686 | (match coccis_rest with | |
b1b2de81 | 687 | (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" -> |
34e49164 C |
688 | (* the case where cocci code has been added before a close } *) |
689 | x::aux started (Indent_cocci2::xs) | |
690 | | _ -> x::aux started xs) | |
691 | | Indent_cocci2::xs -> | |
692 | (match !tabbing_unit with | |
693 | None -> aux started xs | |
694 | | Some (tu,_) -> | |
695 | _current_tabbing := (!_current_tabbing)^tu; | |
708f4980 | 696 | Cocci2 (tu,-1,-1,-1)::aux started xs) |
34e49164 C |
697 | | Unindent_cocci2::xs -> |
698 | (match !tabbing_unit with | |
699 | None -> aux started xs | |
700 | | Some (_,tu) -> | |
701 | _current_tabbing := remtab tu (!_current_tabbing); | |
702 | aux started xs) | |
703 | (* border between existing code and cocci code *) | |
708f4980 | 704 | | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs |
b1b2de81 | 705 | when str_of_token2 x =$= "{" -> |
34e49164 C |
706 | x::aux true (y::Indent_cocci2::xs) |
707 | | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs | |
b1b2de81 | 708 | when str_of_token2 y =$= "}" -> |
34e49164 C |
709 | x::aux started (y::Unindent_cocci2::xs) |
710 | (* starting the body of the function *) | |
b1b2de81 | 711 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs |
708f4980 C |
712 | | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs |
713 | | ((Cocci2("\n",_,_,_)) as x)::xs -> | |
0708f913 | 714 | (* dont inline in expr because of weird eval order of ocaml *) |
34e49164 | 715 | let s = !_current_tabbing in |
708f4980 | 716 | x::Cocci2 (s,-1,-1,-1)::aux started xs |
34e49164 C |
717 | | x::xs -> x::aux started xs in |
718 | aux false xs | |
719 | ||
720 | ||
721 | let rec find_paren_comma = function | |
722 | | [] -> () | |
723 | ||
724 | (* do nothing if was like this in original file *) | |
725 | | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
b1b2de81 | 726 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
727 | find_paren_comma (x2::xs) |
728 | ||
729 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
b1b2de81 | 730 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
731 | find_paren_comma (x2::xs) |
732 | ||
733 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) | |
b1b2de81 | 734 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
735 | find_paren_comma (x2::xs) |
736 | ||
737 | (* otherwise yes can adjust *) | |
738 | | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> | |
739 | x2.remove <- true; | |
740 | find_paren_comma (x2::xs) | |
741 | | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> | |
742 | x1.remove <- true; | |
743 | find_paren_comma (x2::xs) | |
744 | ||
745 | | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> | |
746 | x1.remove <- true; | |
747 | find_paren_comma (x2::xs) | |
748 | ||
749 | | x::xs -> | |
750 | find_paren_comma xs | |
751 | ||
752 | ||
753 | let fix_tokens toks = | |
754 | let toks = toks +> List.map mk_token_extended in | |
755 | ||
756 | let cleaner = toks +> Common.exclude (function | |
757 | | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
758 | | _ -> false | |
759 | ) in | |
760 | find_paren_comma cleaner; | |
761 | ||
762 | let toks = rebuild_tokens_extented toks in | |
763 | toks +> List.map (fun x -> x.tok2) | |
764 | ||
765 | ||
766 | ||
767 | (*****************************************************************************) | |
768 | (* Final unparsing (and debugging support) *) | |
769 | (*****************************************************************************) | |
770 | ||
771 | (* for debugging *) | |
772 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
773 | ||
774 | let kind_of_token2 = function | |
775 | | Fake2 -> KFake | |
776 | | Cocci2 _ -> KCocci | |
777 | | C2 _ -> KC | |
778 | | T2 (t,_,_) -> | |
779 | (match TH.pinfo_of_tok t with | |
780 | | ExpandedTok _ -> KExpanded | |
781 | | OriginTok _ -> KOrigin | |
782 | | FakeTok _ -> raise Impossible (* now a Fake2 *) | |
783 | | AbstractLineTok _ -> raise Impossible (* now a KC *) | |
784 | ) | |
785 | | Unindent_cocci2 | Indent_cocci2 -> raise Impossible | |
786 | ||
787 | let end_mark = "!" | |
788 | ||
789 | let start_mark = function | |
790 | | KFake -> "!F!" | |
791 | | KCocci -> "!S!" | |
792 | | KC -> "!A!" | |
793 | | KExpanded -> "!E!" | |
794 | | KOrigin -> "" | |
795 | ||
796 | let print_all_tokens2 pr xs = | |
797 | if !Flag_parsing_c.debug_unparsing | |
798 | then | |
799 | let current_kind = ref KOrigin in | |
800 | xs +> List.iter (fun t -> | |
801 | let newkind = kind_of_token2 t in | |
b1b2de81 | 802 | if newkind =*= !current_kind |
34e49164 C |
803 | then pr (str_of_token2 t) |
804 | else begin | |
805 | pr (end_mark); | |
806 | pr (start_mark newkind); | |
807 | pr (str_of_token2 t); | |
808 | current_kind := newkind | |
809 | end | |
810 | ); | |
811 | else | |
812 | xs +> List.iter (fun x -> pr (str_of_token2 x)) | |
813 | ||
814 | ||
815 | ||
816 | ||
817 | (*****************************************************************************) | |
818 | (* Entry points *) | |
819 | (*****************************************************************************) | |
820 | ||
821 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
822 | * complete C file, including a modified C file by transformation.ml, | |
823 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
824 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
825 | * fancy stuff when a function was not modified at all. Just need to | |
826 | * print the list of token as-is. But now pretty_print_c.ml handles | |
827 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
828 | * optimize a little the pretty printing. | |
829 | * | |
830 | * update: now have PPviastr which goes even faster than PPviatok, so | |
831 | * PPviatok has disappeared. | |
832 | *) | |
833 | ||
834 | type ppmethod = PPnormal | PPviastr | |
835 | ||
836 | ||
837 | ||
838 | ||
839 | (* The pp_program function will call pretty_print_c.ml with a special | |
840 | * function to print the leaf components, the tokens. When we want to | |
841 | * print a token, we need to print also maybe the space and comments that | |
842 | * were close to it in the original file (and that was omitted during the | |
843 | * parsing phase), and honor what the cocci-info attached to the token says. | |
844 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
845 | * print it and also print some cocci-code attached in a PLUS to it. | |
846 | * So we will also maybe call unparse_cocci. Because the cocci-code may | |
847 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
848 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
849 | *) | |
850 | ||
851 | let pp_program2 xs outfile = | |
852 | Common.with_open_outfile outfile (fun (pr,chan) -> | |
853 | let pr s = | |
854 | if !Flag_parsing_c.debug_unparsing | |
855 | then begin pr2_no_nl s; flush stderr end | |
856 | else pr s | |
857 | (* flush chan; *) | |
858 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
859 | in | |
860 | ||
861 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
862 | ||
863 | (* here can still work on ast *) | |
864 | let e = remove_useless_fakeInfo_struct e in | |
865 | ||
866 | match ppmethod with | |
867 | | PPnormal -> | |
868 | (* now work on tokens *) | |
869 | ||
870 | (* phase1: just get all the tokens, all the information *) | |
871 | assert(toks_e +> List.for_all (fun t -> | |
872 | TH.is_origin t or TH.is_expanded t | |
873 | )); | |
874 | let toks = get_fakeInfo_and_tokens e toks_e in | |
875 | let toks = displace_fake_nodes toks in | |
876 | (* assert Origin;ExpandedTok;Faketok *) | |
877 | let toks = expand_mcode toks in | |
878 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) | |
879 | * and no tag information, just NOTHING. *) | |
880 | ||
881 | (* phase2: can now start to filter and adjust *) | |
882 | let toks = adjust_indentation toks in | |
883 | let toks = remove_minus_and_between_and_expanded_and_fake toks in | |
faf9a90c | 884 | let toks = adjust_before_semicolon toks in |
34e49164 C |
885 | (* assert Origin + Cocci + C and no minus *) |
886 | let toks = add_space toks in | |
887 | let toks = fix_tokens toks in | |
888 | ||
889 | (* in theory here could reparse and rework the ast! or | |
890 | * apply some SP. Not before cos julia may have generated | |
891 | * not parsable file. Need do unparsing_tricks call before being | |
892 | * ready to reparse. *) | |
893 | print_all_tokens2 pr toks; | |
894 | ||
895 | | PPviastr -> pr str | |
896 | ) | |
897 | ) | |
898 | ||
899 | let pp_program a b = | |
900 | Common.profile_code "C unparsing" (fun () -> pp_program2 a b) | |
901 | ||
485bce71 C |
902 | |
903 | let pp_program_default xs outfile = | |
904 | let xs' = xs +> List.map (fun x -> x, PPnormal) in | |
905 | pp_program xs' outfile |