Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau, Julia Lawall |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
ae4735db C |
14 | * |
15 | * | |
485bce71 | 16 | * Modifications by Julia Lawall for better newline handling. |
34e49164 C |
17 | *) |
18 | open Common | |
19 | ||
20 | open Ast_c | |
21 | ||
22 | module TH = Token_helpers | |
23 | ||
24 | ||
b1b2de81 C |
25 | (* should keep comments and directives in between adjacent deleted terms, |
26 | but not comments and directives within deleted terms. should use the | |
27 | labels found in the control-flow graph *) | |
28 | ||
34e49164 C |
29 | |
30 | ||
708f4980 C |
31 | (*****************************************************************************) |
32 | (* Wrappers *) | |
33 | (*****************************************************************************) | |
34 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing | |
35 | ||
34e49164 C |
36 | (*****************************************************************************) |
37 | (* Types used during the intermediate phases of the unparsing *) | |
38 | (*****************************************************************************) | |
39 | ||
ae4735db | 40 | type token1 = |
34e49164 C |
41 | | Fake1 of info |
42 | | T1 of Parser_c.token | |
43 | ||
44 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
45 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
46 | * token and get something simpler ? because we need to know if the | |
47 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
48 | * analysis so easier to keep with the token. | |
ae4735db | 49 | * |
34e49164 C |
50 | * This type contains the whole information. Have all the tokens with this |
51 | * type. | |
52 | *) | |
708f4980 | 53 | type min = |
5626f154 | 54 | Min of (int list (* match numbers from witness trees *) * |
8babbc8f | 55 | Ast_cocci.adjacency (* adjacency information *)) |
708f4980 C |
56 | | Ctx |
57 | ||
ae4735db C |
58 | type token2 = |
59 | | T2 of Parser_c.token * min * | |
34e49164 | 60 | int option (* orig index, abstracting away comments and space *) |
8babbc8f | 61 | | Fake2 of min |
708f4980 | 62 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
5636bb2c | 63 | * Unparse_cocci.nlhint option |
34e49164 | 64 | | C2 of string |
c491d8ee | 65 | | Comma of string |
34e49164 | 66 | | Indent_cocci2 |
c3e37e97 | 67 | | Unindent_cocci2 of bool (* true for permanent, false for temporary *) |
34e49164 C |
68 | |
69 | (* not used yet *) | |
ae4735db | 70 | type token3 = |
34e49164 C |
71 | | T3 of Parser_c.token |
72 | | Cocci3 of string | |
73 | | C3 of string | |
74 | ||
75 | ||
76 | (* similar to the tech in parsing_hack *) | |
77 | type token_extended = { | |
78 | tok2 : token2; | |
79 | str : string; | |
80 | idx: int option; (* to know if 2 tokens were consecutive in orig file *) | |
81 | mutable new_tokens_before : token2 list; | |
82 | mutable remove : bool; | |
83 | } | |
84 | ||
85 | ||
86 | (*****************************************************************************) | |
87 | (* Helpers *) | |
88 | (*****************************************************************************) | |
89 | ||
ae4735db | 90 | let info_of_token1 t = |
34e49164 C |
91 | match t with |
92 | | Fake1 info -> info | |
93 | | T1 tok -> TH.info_of_tok tok | |
94 | ||
002099fc C |
95 | let print_token1 = function |
96 | T1 tok -> TH.str_of_tok tok | |
97 | | Fake1 info -> "fake" | |
98 | ||
34e49164 C |
99 | let str_of_token2 = function |
100 | | T2 (t,_,_) -> TH.str_of_tok t | |
8babbc8f | 101 | | Fake2 _ -> "" |
5636bb2c | 102 | | Cocci2 (s,_,_,_,_) -> s |
34e49164 | 103 | | C2 s -> s |
c491d8ee | 104 | | Comma s -> s |
34e49164 | 105 | | Indent_cocci2 -> "" |
c3e37e97 | 106 | | Unindent_cocci2 _ -> "" |
34e49164 C |
107 | |
108 | let print_token2 = function | |
708f4980 | 109 | | T2 (t,b,_) -> |
8babbc8f C |
110 | let t_str = |
111 | match t with | |
112 | | Parser_c.TCommentSpace _ -> " sp " | |
113 | | Parser_c.TCommentNewline _ -> " nl " | |
114 | | Parser_c.TCommentCpp _ -> " cp " | |
115 | | Parser_c.TCommentMisc _ -> " misc " | |
116 | | Parser_c.TComment _ -> " comment " | |
117 | | _ -> "" in | |
708f4980 C |
118 | let b_str = |
119 | match b with | |
120 | Min (index,adj) -> | |
8babbc8f C |
121 | Printf.sprintf "-%d[%s]" |
122 | (match adj with Ast_cocci.ADJ n -> n | _ -> -1) | |
708f4980 C |
123 | (String.concat " " (List.map string_of_int index)) |
124 | | Ctx -> "" in | |
8babbc8f C |
125 | "T2:"^b_str^t_str^TH.str_of_tok t |
126 | | Fake2 b -> | |
127 | let b_str = | |
128 | match b with | |
129 | Min (index,adj) -> | |
130 | Printf.sprintf "-%d[%s]" | |
131 | (match adj with Ast_cocci.ADJ n -> n | _ -> -1) | |
132 | (String.concat " " (List.map string_of_int index)) | |
133 | | Ctx -> "" in | |
134 | b_str^"fake" | |
5636bb2c | 135 | | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
34e49164 | 136 | | C2 s -> "C2:"^s |
c491d8ee | 137 | | Comma s -> "Comma:"^s |
34e49164 | 138 | | Indent_cocci2 -> "Indent" |
c3e37e97 | 139 | | Unindent_cocci2 _ -> "Unindent" |
34e49164 | 140 | |
002099fc | 141 | let simple_print_all_tokens1 l = |
90aeb998 | 142 | List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l; |
002099fc C |
143 | Printf.printf "\n" |
144 | ||
708f4980 | 145 | let simple_print_all_tokens2 l = |
fc1ad971 | 146 | List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l; |
34e49164 C |
147 | Printf.printf "\n" |
148 | ||
149 | let str_of_token3 = function | |
150 | | T3 t -> TH.str_of_tok t | |
151 | | Cocci3 s | C3 s -> s | |
152 | ||
153 | ||
154 | ||
ae4735db C |
155 | let mk_token_extended x = |
156 | let origidx = | |
34e49164 | 157 | match x with |
ae4735db | 158 | | T2 (_,_, idx) -> idx |
34e49164 C |
159 | | _ -> None |
160 | in | |
ae4735db | 161 | { tok2 = x; |
34e49164 C |
162 | str = str_of_token2 x; |
163 | idx = origidx; | |
164 | new_tokens_before = []; | |
165 | remove = false; | |
166 | } | |
167 | ||
ae4735db | 168 | let rebuild_tokens_extented toks_ext = |
34e49164 | 169 | let _tokens = ref [] in |
ae4735db | 170 | toks_ext +> List.iter (fun tok -> |
34e49164 C |
171 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); |
172 | if not tok.remove then push2 tok.tok2 _tokens; | |
173 | ); | |
174 | let tokens = List.rev !_tokens in | |
175 | (tokens +> List.map mk_token_extended) | |
176 | ||
177 | ||
178 | let mcode_contain_plus = function | |
179 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
180 | | Ast_cocci.CONTEXT _ -> true | |
485bce71 | 181 | (* patch: when need full coccinelle transformation *) |
8babbc8f C |
182 | | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false |
183 | | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*) | |
951c7801 | 184 | | Ast_cocci.PLUS _ -> raise Impossible |
34e49164 | 185 | |
ae4735db | 186 | let contain_plus info = |
34e49164 C |
187 | let mck = Ast_c.mcode_of_info info in |
188 | mcode_contain_plus mck | |
189 | ||
190 | (*****************************************************************************) | |
191 | (* Last fix on the ast *) | |
192 | (*****************************************************************************) | |
193 | ||
ae4735db | 194 | (* Because of the ugly trick to handle initialiser, I generate fake ',' |
34e49164 C |
195 | * for the last initializer element, but if there is nothing around it, |
196 | * I don't want in the end to print it. | |
197 | *) | |
198 | ||
ae4735db | 199 | let remove_useless_fakeInfo_struct program = |
34e49164 | 200 | let bigf = { Visitor_c.default_visitor_c_s with |
ae4735db | 201 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
34e49164 | 202 | match k ini with |
113803cf | 203 | | InitList args, ii -> |
34e49164 C |
204 | (match ii with |
205 | | [_i1;_i2] -> ini | |
113803cf | 206 | | [i1;i2;iicommaopt] -> |
34e49164 C |
207 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) |
208 | && (Ast_c.is_fake iicommaopt) | |
209 | (* sometimes the guy put a normal iicommaopt *) | |
34e49164 C |
210 | then InitList args, [i1;i2] |
211 | else InitList args, [i1;i2;iicommaopt] | |
ae4735db | 212 | | [i1;i2;iicommaopt;end_comma_opt] -> |
113803cf C |
213 | (* only in #define. end_comma_opt canot be fake *) |
214 | (* not sure if this will be considered ambiguous with a previous | |
215 | case? *) | |
216 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) | |
217 | && (Ast_c.is_fake iicommaopt) | |
218 | (* sometimes the guy put a normal iicommaopt *) | |
219 | then InitList args, [i1;i2;end_comma_opt] | |
220 | else InitList args, [i1;i2;iicommaopt;end_comma_opt] | |
34e49164 C |
221 | | _ -> raise Impossible |
222 | ) | |
223 | | x -> x | |
224 | ) | |
225 | } in | |
226 | Visitor_c.vk_toplevel_s bigf program | |
227 | ||
228 | ||
229 | (*****************************************************************************) | |
230 | (* Tokens1 generation *) | |
231 | (*****************************************************************************) | |
232 | ||
ae4735db C |
233 | let get_fakeInfo_and_tokens celem toks = |
234 | let toks_in = ref toks in | |
34e49164 C |
235 | let toks_out = ref [] in |
236 | ||
237 | (* todo? verify good order of position ? *) | |
ae4735db | 238 | let pr_elem info = |
34e49164 | 239 | match Ast_c.pinfo_of_info info with |
ae4735db | 240 | | FakeTok _ -> |
34e49164 | 241 | Common.push2 (Fake1 info) toks_out |
ae4735db | 242 | | OriginTok _ | ExpandedTok _ -> |
34e49164 | 243 | (* get the associated comments/space/cppcomment tokens *) |
978fd7e5 | 244 | let (before, x, after) = |
ae4735db | 245 | !toks_in +> Common.split_when (fun tok -> |
34e49164 C |
246 | info =*= TH.info_of_tok tok) |
247 | in | |
b1b2de81 | 248 | assert(info =*= TH.info_of_tok x); |
34e49164 | 249 | (*old: assert(before +> List.for_all (TH.is_comment)); *) |
ae4735db | 250 | before +> List.iter (fun x -> |
34e49164 | 251 | if not (TH.is_comment x) |
0708f913 | 252 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) |
34e49164 C |
253 | (* case such as int asm d3("x"); not yet in ast *) |
254 | ); | |
255 | before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); | |
256 | push2 (T1 x) toks_out; | |
257 | toks_in := after; | |
ae4735db | 258 | | AbstractLineTok _ -> |
34e49164 C |
259 | (* can be called on type info when for instance use -type_c *) |
260 | if !Flag_parsing_c.pretty_print_type_info | |
261 | then Common.push2 (Fake1 info) toks_out | |
ae4735db | 262 | else raise Impossible (* at this stage *) |
34e49164 C |
263 | in |
264 | ||
265 | let pr_space _ = () in (* use the spacing that is there already *) | |
266 | ||
267 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
268 | ||
269 | if not (null !toks_in) | |
0708f913 | 270 | then failwith "WEIRD: unparsing not finished"; |
34e49164 C |
271 | |
272 | List.rev !toks_out | |
273 | ||
8babbc8f | 274 | (* Fake nodes that have BEFORE code or are - should be moved over any subsequent |
34e49164 | 275 | whitespace and newlines, but not any comments, to get as close to the affected |
002099fc | 276 | code as possible. Similarly, fake nodes that have AFTER code should be moved |
34e49164 C |
277 | backwards. No fake nodes should have both before and after code. *) |
278 | ||
279 | let displace_fake_nodes toks = | |
280 | let is_fake = function Fake1 _ -> true | _ -> false in | |
281 | let is_whitespace = function | |
282 | T1(Parser_c.TCommentSpace _) | |
485bce71 | 283 | (* patch: cocci *) |
34e49164 C |
284 | | T1(Parser_c.TCommentNewline _) -> true |
285 | | _ -> false in | |
286 | let rec loop toks = | |
287 | let fake_info = | |
288 | try Some (Common.split_when is_fake toks) | |
289 | with Not_found -> None in | |
290 | match fake_info with | |
291 | Some(bef,((Fake1 info) as fake),aft) -> | |
292 | (match !(info.cocci_tag) with | |
ae4735db | 293 | | Some x -> |
708f4980 | 294 | (match x with |
8babbc8f C |
295 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) |
296 | | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) -> | |
34e49164 | 297 | (* move the fake node forwards *) |
002099fc C |
298 | let (whitespace,rest) = Common.span is_whitespace aft in |
299 | bef @ whitespace @ fake :: (loop rest) | |
300 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
34e49164 | 301 | (* move the fake node backwards *) |
002099fc C |
302 | let revbef = List.rev bef in |
303 | let (revwhitespace,revprev) = Common.span is_whitespace revbef in | |
304 | let whitespace = List.rev revwhitespace in | |
305 | let prev = List.rev revprev in | |
306 | prev @ fake :: (loop (whitespace @ aft)) | |
307 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) -> | |
308 | bef @ fake :: (loop aft) | |
309 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
310 | failwith "fake node should not be before-after" | |
311 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) | |
485bce71 | 312 | ) |
ae4735db | 313 | | None -> |
708f4980 C |
314 | bef @ fake :: (loop aft) |
315 | ) | |
34e49164 C |
316 | | None -> toks |
317 | | _ -> raise Impossible in | |
318 | loop toks | |
319 | ||
320 | (*****************************************************************************) | |
321 | (* Tokens2 generation *) | |
322 | (*****************************************************************************) | |
323 | ||
b1b2de81 | 324 | let comment2t2 = function |
5636bb2c C |
325 | (Token_c.TCommentCpp |
326 | (* not sure iif the following list is exhaustive or complete *) | |
327 | (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError), | |
328 | (info : Token_c.info)) -> | |
329 | C2(info.Common.str) | |
330 | | (Token_c.TCommentCpp x,(info : Token_c.info)) -> | |
b1b2de81 C |
331 | C2("\n"^info.Common.str^"\n") |
332 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x)) | |
333 | ||
ae4735db | 334 | let expand_mcode toks = |
34e49164 C |
335 | let toks_out = ref [] in |
336 | ||
337 | let index = ref 0 in | |
338 | ||
ae4735db | 339 | let add_elem t minus = |
34e49164 | 340 | match t with |
ae4735db | 341 | | Fake1 info -> |
34e49164 | 342 | let str = Ast_c.str_of_info info in |
b1b2de81 | 343 | if str =$= "" |
8babbc8f | 344 | then push2 (Fake2 minus) toks_out |
c491d8ee C |
345 | (* fx the fake "," at the end of a structure or enum. |
346 | no idea what other fake info there can be... *) | |
347 | else push2 (Comma str) toks_out | |
ae4735db C |
348 | |
349 | ||
002099fc | 350 | | T1 tok -> |
b1b2de81 | 351 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
34e49164 | 352 | (* no tag on expandedTok ! *) |
fc1ad971 C |
353 | let modified = function |
354 | None -> false | |
355 | | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false | |
356 | | _ -> true in | |
357 | (if (TH.is_expanded tok && | |
358 | modified !((TH.info_of_tok tok).cocci_tag) | |
359 | (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)) | |
0708f913 C |
360 | then |
361 | failwith | |
362 | (Printf.sprintf | |
363 | "expanded token %s on line %d is either modified or stored in a metavariable" | |
364 | (TH.str_of_tok tok) (TH.line_of_tok tok))); | |
34e49164 | 365 | |
ae4735db | 366 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> |
34e49164 C |
367 | { i with cocci_tag = ref Ast_c.emptyAnnot; } |
368 | ) in | |
369 | ||
ae4735db | 370 | let optindex = |
34e49164 C |
371 | if TH.is_origin tok && not (TH.is_real_comment tok) |
372 | then begin | |
373 | incr index; | |
374 | Some !index | |
375 | end | |
376 | else None | |
377 | in | |
378 | ||
379 | push2 (T2 (tok', minus, optindex)) toks_out | |
380 | in | |
381 | ||
ae4735db C |
382 | let expand_info t = |
383 | let (mcode,env) = | |
708f4980 | 384 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in |
34e49164 | 385 | |
5636bb2c C |
386 | let pr_cocci s ln col rcol hint = |
387 | push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in | |
ae4735db | 388 | let pr_c info = |
b1b2de81 C |
389 | (match Ast_c.pinfo_of_info info with |
390 | Ast_c.AbstractLineTok _ -> | |
391 | push2 (C2 (Ast_c.str_of_info info)) toks_out | |
392 | | Ast_c.FakeTok (s,_) -> | |
393 | push2 (C2 s) toks_out | |
34e49164 C |
394 | | _ -> |
395 | Printf.printf "line: %s\n" (Common.dump info); | |
b1b2de81 C |
396 | failwith "not an abstract line"); |
397 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> | |
398 | List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in | |
399 | ||
708f4980 | 400 | let pr_barrier ln col = (* marks a position, used around C code *) |
5636bb2c | 401 | push2 (Cocci2("",ln,col,col,None)) toks_out in |
708f4980 | 402 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) |
b1b2de81 | 403 | |
708f4980 | 404 | let pr_cspace _ = push2 (C2 " ") toks_out in |
34e49164 | 405 | |
708f4980 C |
406 | let pr_space _ = () (* rely on add_space in cocci code *) in |
407 | let pr_arity _ = () (* not interested *) in | |
34e49164 C |
408 | |
409 | let indent _ = push2 Indent_cocci2 toks_out in | |
c3e37e97 | 410 | let unindent x = push2 (Unindent_cocci2 x) toks_out in |
34e49164 | 411 | |
708f4980 C |
412 | let args_pp = |
413 | (env, pr_cocci, pr_c, pr_cspace, | |
414 | (match !Flag_parsing_c.spacing with | |
415 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), | |
416 | pr_arity, | |
417 | (match !Flag_parsing_c.spacing with | |
418 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), | |
419 | indent, unindent) in | |
34e49164 | 420 | |
ae4735db C |
421 | (* old: when for yacfe with partial cocci: |
422 | * add_elem t false; | |
485bce71 | 423 | *) |
34e49164 | 424 | |
485bce71 | 425 | (* patch: when need full coccinelle transformation *) |
faf9a90c | 426 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
34e49164 | 427 | match mcode with |
ae4735db | 428 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
34e49164 | 429 | (* Why adding ? because I want to have all the information, the whole |
ae4735db | 430 | * set of tokens, so I can then process and remove the |
34e49164 | 431 | * is_between_two_minus for instance *) |
708f4980 | 432 | add_elem t (Min (inst,adj)); |
8babbc8f C |
433 | (match any_xxs with |
434 | Ast_cocci.NOREPLACEMENT -> () | |
435 | | Ast_cocci.REPLACEMENT(any_xxs,_) -> | |
436 | unparser any_xxs Unparse_cocci.InPlace) | |
ae4735db | 437 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
34e49164 | 438 | (match any_befaft with |
ae4735db | 439 | | Ast_cocci.NOTHING -> |
708f4980 | 440 | add_elem t Ctx |
951c7801 | 441 | | Ast_cocci.BEFORE (xxs,_) -> |
faf9a90c | 442 | unparser xxs Unparse_cocci.Before; |
708f4980 | 443 | add_elem t Ctx |
ae4735db | 444 | | Ast_cocci.AFTER (xxs,_) -> |
708f4980 | 445 | add_elem t Ctx; |
faf9a90c | 446 | unparser xxs Unparse_cocci.After; |
ae4735db | 447 | | Ast_cocci.BEFOREAFTER (xxs, yys, _) -> |
faf9a90c | 448 | unparser xxs Unparse_cocci.Before; |
708f4980 | 449 | add_elem t Ctx; |
faf9a90c | 450 | unparser yys Unparse_cocci.After; |
34e49164 | 451 | ) |
951c7801 | 452 | | Ast_cocci.PLUS _ -> raise Impossible |
34e49164 C |
453 | |
454 | in | |
455 | ||
456 | toks +> List.iter expand_info; | |
457 | List.rev !toks_out | |
ae4735db | 458 | |
34e49164 C |
459 | |
460 | (*****************************************************************************) | |
461 | (* Tokens2 processing, filtering, adjusting *) | |
462 | (*****************************************************************************) | |
463 | ||
002099fc | 464 | let is_space = function |
fc1ad971 | 465 | | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *) |
ae4735db | 466 | | _ -> false |
fc1ad971 C |
467 | |
468 | let is_newline = function | |
469 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
470 | | _ -> false | |
471 | ||
472 | let is_whitespace = function | |
ae4735db | 473 | | (T2 (t,_b,_i)) -> |
002099fc C |
474 | (match t with |
475 | | Parser_c.TCommentSpace _ -> true (* only whitespace *) | |
fc1ad971 | 476 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
002099fc C |
477 | | _ -> false |
478 | ) | |
ae4735db | 479 | | _ -> false |
002099fc | 480 | |
34e49164 | 481 | let is_minusable_comment = function |
ae4735db | 482 | | (T2 (t,_b,_i)) -> |
34e49164 C |
483 | (match t with |
484 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
ae4735db | 485 | (* patch: coccinelle *) |
002099fc | 486 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
7f004419 | 487 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
002099fc C |
488 | | Parser_c.TComment _ |
489 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
b1b2de81 C |
490 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
491 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *) | |
34e49164 C |
492 | -> true |
493 | ||
b1b2de81 | 494 | | Parser_c.TCommentMisc _ |
0708f913 | 495 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) |
34e49164 C |
496 | -> false |
497 | ||
498 | | _ -> false | |
499 | ) | |
ae4735db | 500 | | _ -> false |
34e49164 | 501 | |
fc1ad971 | 502 | let is_minusable_comment_nocpp = function |
ae4735db | 503 | | (T2 (t,_b,_i)) -> |
fc1ad971 C |
504 | (match t with |
505 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
ae4735db | 506 | (* patch: coccinelle *) |
fc1ad971 | 507 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
7f004419 | 508 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
fc1ad971 C |
509 | | Parser_c.TComment _ -> true |
510 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
511 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) | |
512 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
513 | -> false | |
514 | ||
515 | | Parser_c.TCommentMisc _ | |
516 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) | |
517 | -> false | |
518 | ||
519 | | _ -> false | |
520 | ) | |
ae4735db | 521 | | _ -> false |
fc1ad971 | 522 | |
34e49164 | 523 | let all_coccis = function |
c491d8ee | 524 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true |
34e49164 C |
525 | | _ -> false |
526 | ||
b1b2de81 C |
527 | (*previously gave up if the first character was a newline, but not clear why*) |
528 | let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x | |
34e49164 | 529 | |
708f4980 | 530 | let set_minus_comment adj = function |
ae4735db | 531 | | T2 (t,Ctx,idx) -> |
34e49164 C |
532 | let str = TH.str_of_tok t in |
533 | (match t with | |
534 | | Parser_c.TCommentSpace _ | |
ae4735db | 535 | (* patch: coccinelle *) |
34e49164 C |
536 | | Parser_c.TCommentNewline _ -> () |
537 | ||
ae4735db C |
538 | | Parser_c.TComment _ |
539 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
b1b2de81 C |
540 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
541 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
ae4735db | 542 | -> |
b1b2de81 C |
543 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" |
544 | (TH.line_of_tok t) str) | |
34e49164 C |
545 | | _ -> raise Impossible |
546 | ); | |
708f4980 | 547 | T2 (t, Min adj, idx) |
ae4735db | 548 | (* patch: coccinelle *) |
fc1ad971 | 549 | | T2 (t,Min adj,idx) as x -> x |
8babbc8f | 550 | | Fake2 _ as x -> x |
34e49164 C |
551 | | _ -> raise Impossible |
552 | ||
708f4980 | 553 | let set_minus_comment_or_plus adj = function |
c491d8ee | 554 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x |
708f4980 | 555 | | x -> set_minus_comment adj x |
34e49164 | 556 | |
fc1ad971 C |
557 | let drop_minus xs = |
558 | xs +> Common.exclude (function | |
559 | | T2 (t,Min adj,_) -> true | |
560 | | _ -> false | |
561 | ) | |
562 | ||
8babbc8f | 563 | let drop_expanded xs = |
5626f154 | 564 | xs +> Common.exclude (function |
34e49164 | 565 | | T2 (t,_,_) when TH.is_expanded t -> true |
8babbc8f C |
566 | | _ -> false |
567 | ) | |
568 | ||
569 | let drop_fake xs = | |
570 | xs +> Common.exclude (function | |
571 | | Fake2 _ -> true | |
34e49164 C |
572 | | _ -> false |
573 | ) | |
5626f154 C |
574 | |
575 | let remove_minus_and_between_and_expanded_and_fake xs = | |
576 | ||
8babbc8f C |
577 | (* get rid of expanded tok *) |
578 | let xs = drop_expanded xs in | |
34e49164 | 579 | |
113803cf | 580 | let minus_or_comment = function |
708f4980 | 581 | T2(_,Min adj,_) -> true |
113803cf C |
582 | | x -> is_minusable_comment x in |
583 | ||
fc1ad971 C |
584 | let minus_or_comment_nocpp = function |
585 | T2(_,Min adj,_) -> true | |
586 | | x -> is_minusable_comment_nocpp x in | |
587 | ||
588 | let common_adj (index1,adj1) (index2,adj2) = | |
8babbc8f C |
589 | let same_adj = (* same adjacency info *) |
590 | match (adj1,adj2) with | |
591 | (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2 | |
592 | | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in | |
593 | same_adj && | |
fc1ad971 C |
594 | (* non-empty intersection of witness trees *) |
595 | not ((Common.inter_set index1 index2) = []) in | |
596 | ||
fc1ad971 C |
597 | (* new idea: collects regions not containing non-space context code |
598 | if two adjacent adjacent minus tokens satisfy common_adj then delete | |
599 | all spaces, comments etc between them | |
600 | if two adjacent minus tokens do not satisfy common_adj only delete | |
601 | the spaces between them if there are no comments, etc. | |
602 | if the region contain no plus code and is both preceded and followed | |
603 | by a newline, delete the initial newline. *) | |
604 | ||
605 | let rec adjust_around_minus = function | |
34e49164 | 606 | [] -> [] |
708f4980 | 607 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x):: |
8babbc8f | 608 | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs -> |
fc1ad971 C |
609 | let (minus_list,rest) = Common.span not_context (t1::xs) in |
610 | let contains_plus = List.exists is_plus minus_list in | |
611 | let x = | |
612 | match List.rev minus_list with | |
613 | (T2(Parser_c.TCommentNewline c,_b,_i))::rest | |
614 | when List.for_all minus_or_comment minus_list -> | |
615 | set_minus_comment_or_plus adj1 x | |
616 | | _ -> x in | |
617 | x :: adjust_within_minus contains_plus minus_list @ | |
618 | adjust_around_minus rest | |
8babbc8f | 619 | | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs -> |
fc1ad971 C |
620 | let (minus_list,rest) = Common.span not_context (t1::xs) in |
621 | let contains_plus = List.exists is_plus minus_list in | |
622 | adjust_within_minus contains_plus minus_list @ adjust_around_minus rest | |
c491d8ee C |
623 | | x::xs -> |
624 | x :: adjust_around_minus xs | |
fc1ad971 | 625 | and adjust_within_minus cp (* contains plus *) = function |
8babbc8f | 626 | ((Fake2(Min adj1) | T2(_,Min adj1,_)) as t1)::xs -> |
fc1ad971 C |
627 | let not_minus = function T2(_,Min _,_) -> false | _ -> true in |
628 | let (not_minus_list,rest) = Common.span not_minus xs in | |
629 | t1 :: | |
34e49164 | 630 | (match rest with |
8babbc8f C |
631 | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs |
632 | when common_adj adj1 adj2 -> | |
fc1ad971 C |
633 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) |
634 | @ (adjust_within_minus cp (t2::xs)) | |
8babbc8f | 635 | | ((Fake2(Min adj2) | T2(_,Min adj2,_)) as t2)::xs -> |
5626f154 | 636 | if not cp && List.for_all is_whitespace not_minus_list |
fc1ad971 C |
637 | then |
638 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) | |
639 | @ (adjust_within_minus cp (t2::xs)) | |
5626f154 C |
640 | else |
641 | not_minus_list @ (adjust_within_minus cp (t2::xs)) | |
fc1ad971 C |
642 | | _ -> |
643 | if cp | |
644 | then xs | |
645 | else | |
8babbc8f C |
646 | (* remove spaces after removed stuff, eg a comma after a |
647 | function argument *) | |
fc1ad971 C |
648 | let (spaces,rest) = Common.span is_space xs in |
649 | (List.map (set_minus_comment_or_plus adj1) spaces) | |
650 | @ rest) | |
651 | | xs -> failwith "should always start with minus" | |
652 | and not_context = function | |
653 | (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false | |
654 | | _ -> true | |
655 | and is_plus = function | |
c491d8ee | 656 | C2 _ | Comma _ | Cocci2 _ -> true |
fc1ad971 | 657 | | _ -> false in |
34e49164 | 658 | |
fc1ad971 | 659 | let xs = adjust_around_minus xs in |
34e49164 | 660 | |
8babbc8f C |
661 | (* get rid of fake tok *) |
662 | let xs = drop_fake xs in | |
663 | ||
113803cf | 664 | (* this drops blank lines after a brace introduced by removing code *) |
fc1ad971 C |
665 | let minus_or_comment_nonl = function |
666 | T2(_,Min adj,_) -> true | |
667 | | T2(Parser_c.TCommentNewline _,_b,_i) -> false | |
668 | | x -> is_minusable_comment x in | |
669 | ||
113803cf C |
670 | let rec adjust_after_brace = function |
671 | [] -> [] | |
708f4980 | 672 | | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs) |
b1b2de81 | 673 | when str_of_token2 x =$= "{" -> |
fc1ad971 | 674 | let (between_minus,rest) = Common.span minus_or_comment_nonl xs in |
113803cf C |
675 | let is_whitespace = function |
676 | T2(Parser_c.TCommentSpace _,_b,_i) | |
677 | (* patch: cocci *) | |
678 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
679 | | _ -> false in | |
680 | let (newlines,rest) = Common.span is_whitespace rest in | |
681 | let (drop_newlines,last_newline) = | |
682 | let rec loop = function | |
683 | [] -> ([],[]) | |
684 | | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest -> | |
685 | (List.rev rest,[x]) | |
686 | | x::xs -> | |
687 | let (drop_newlines,last_newline) = loop xs in | |
688 | (drop_newlines,x::last_newline) in | |
689 | loop (List.rev newlines) in | |
708f4980 | 690 | x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@ |
113803cf C |
691 | last_newline@ |
692 | adjust_after_brace rest | |
693 | | x::xs -> x::adjust_after_brace xs in | |
694 | ||
695 | let xs = adjust_after_brace xs in | |
696 | ||
fc1ad971 C |
697 | (* search backwards from context } over spaces until reaching a newline. |
698 | then go back over all minus code until reaching some context or + code. | |
699 | get rid of all intervening spaces, newlines, and comments | |
700 | input is reversed *) | |
701 | let rec adjust_before_brace = function | |
702 | [] -> [] | |
703 | | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x -> | |
704 | let (outer_spaces,rest) = Common.span is_space xs in | |
705 | x :: outer_spaces @ | |
706 | (match rest with | |
707 | ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) :: | |
708 | (* the rest of this code is the same as from_newline below | |
709 | but merging them seems to be error prone... *) | |
710 | ((T2 (t, Min adj, idx)) as m) :: rest -> | |
711 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
712 | h :: m :: | |
713 | (List.map (set_minus_comment adj) spaces) @ | |
714 | (adjust_before_brace rest) | |
715 | | _ -> adjust_before_brace rest) | |
716 | | x::xs -> x :: (adjust_before_brace xs) in | |
717 | ||
718 | let from_newline = function | |
719 | ((T2 (t, Min adj, idx)) as m) :: rest -> | |
720 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
721 | m :: | |
722 | (List.map (set_minus_comment adj) spaces) @ | |
723 | (adjust_before_brace rest) | |
c491d8ee C |
724 | | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest |
725 | when TH.str_of_tok t0 = "" -> | |
726 | (* This is for the case of a #define that is completely deleted, | |
727 | because a #define has a strange EOL token at the end. | |
728 | We hope there i no other kind of token that is represented by | |
729 | "", but it seems like changing the kind of token might break | |
730 | the end of entity recognition in the C parser. | |
731 | See parsing_hacks.ml *) | |
732 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
733 | m0 :: m :: | |
734 | (List.map (set_minus_comment adj) spaces) @ | |
735 | (adjust_before_brace rest) | |
fc1ad971 C |
736 | | rest -> adjust_before_brace rest in |
737 | ||
738 | let xs = List.rev (from_newline (List.rev xs)) in | |
739 | let xs = drop_minus xs in | |
34e49164 C |
740 | xs |
741 | ||
faf9a90c C |
742 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
743 | let adjust_before_semicolon toks = | |
744 | let toks = List.rev toks in | |
002099fc | 745 | let rec search_semic = function |
faf9a90c | 746 | [] -> [] |
002099fc | 747 | | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs -> |
faf9a90c | 748 | if List.mem (str_of_token2 x) [";";")";","] |
8babbc8f | 749 | then x :: search_semic (search_minus false xs) |
002099fc C |
750 | else x :: search_semic xs |
751 | | x::xs -> x :: search_semic xs | |
752 | and search_minus seen_minus xs = | |
753 | let (spaces, rest) = Common.span is_space xs in | |
754 | (* only delete spaces if something is actually deleted *) | |
755 | match rest with | |
756 | ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest | |
757 | | _ -> if seen_minus then rest else xs in | |
758 | List.rev (search_semic toks) | |
34e49164 | 759 | |
8babbc8f C |
760 | (* normally, in C code, a ( is not followed by a space or newline *) |
761 | let adjust_after_paren toks = | |
762 | let rec search_paren = function | |
763 | [] -> [] | |
764 | | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs -> | |
765 | if List.mem (str_of_token2 x) ["("] (* other things? *) | |
766 | then x :: search_paren(search_minus false xs) | |
767 | else x :: search_paren xs | |
768 | | x::xs -> x :: search_paren xs | |
769 | and search_minus seen_minus xs = | |
65038c61 | 770 | let (spaces, rest) = Common.span is_whitespace xs in |
8babbc8f C |
771 | (* only delete spaces if something is actually deleted *) |
772 | match rest with | |
773 | ((T2(_,Min _,_)) as a)::rerest -> (* minus *) | |
774 | a :: search_minus true rerest | |
775 | | ((T2(_,Ctx,_)) as a)::rerest when str_of_token2 a = "," -> | |
776 | (* comma after ( will be deleted, so consider it as minus code | |
777 | already *) | |
778 | a :: search_minus true rerest | |
779 | | _ -> if seen_minus then rest else xs in (* drop trailing space *) | |
780 | search_paren toks | |
781 | ||
65038c61 C |
782 | (* this is for the case where braces are added around an if branch *) |
783 | let paren_then_brace toks = | |
784 | let rec search_paren = function | |
785 | [] -> [] | |
786 | | ((T2(_,Ctx,_)) as x)::xs -> | |
787 | if List.mem (str_of_token2 x) [")"] | |
788 | then x :: search_paren(search_plus xs) | |
789 | else x :: search_paren xs | |
790 | | x::xs -> x :: search_paren xs | |
791 | and search_plus xs = | |
792 | let (spaces, rest) = Common.span is_whitespace xs in | |
793 | match rest with | |
794 | (* move the brace up to the previous line *) | |
795 | ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) -> | |
796 | (C2 " ") :: x :: spaces @ rest | |
797 | | _ -> xs in | |
798 | search_paren toks | |
799 | ||
34e49164 C |
800 | let is_ident_like s = s ==~ Common.regexp_alpha |
801 | ||
aba5c457 C |
802 | let rec drop_space_at_endline = function |
803 | [] -> [] | |
804 | | [x] -> [x] | |
190f1acf | 805 | | (C2 " ") :: |
8f657093 C |
806 | ((((T2(Parser_c.TCommentSpace _,Ctx,_)) | Cocci2("\n",_,_,_,_) | |
807 | (T2(Parser_c.TCommentNewline _,Ctx,_))) :: _) as rest) -> | |
190f1acf C |
808 | (* when unparse_cocci doesn't know whether space is needed *) |
809 | drop_space_at_endline rest | |
aba5c457 C |
810 | | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest -> |
811 | let (outer_spaces,rest) = Common.span is_space rest in | |
812 | let minus_or_comment_or_space_nocpp = function | |
813 | T2(_,Min adj,_) -> true | |
814 | | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true | |
18b1275a C |
815 | | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false |
816 | | x -> false in | |
aba5c457 | 817 | let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in |
18b1275a C |
818 | let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in |
819 | if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus | |
820 | then | |
821 | match rest with | |
822 | ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest -> | |
823 | (* drop trailing spaces *) | |
824 | minus@a::(drop_space_at_endline rest) | |
825 | | _ -> fail() | |
826 | else fail() | |
8f657093 C |
827 | | a :: rest -> |
828 | a :: drop_space_at_endline rest | |
aba5c457 C |
829 | |
830 | (* if a removed ( is between two tokens, then add a space *) | |
831 | let rec paren_to_space = function | |
832 | [] -> [] | |
833 | | [x] -> [x] | |
834 | | [x;y] -> [x;y] | |
835 | | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest | |
836 | when not (is_whitespace a) && TH.str_of_tok t = "(" -> | |
aba5c457 C |
837 | a :: b :: (C2 " ") :: (paren_to_space (c :: rest)) |
838 | | a :: rest -> a :: (paren_to_space rest) | |
839 | ||
ae4735db | 840 | let rec add_space xs = |
34e49164 C |
841 | match xs with |
842 | | [] -> [] | |
843 | | [x] -> [x] | |
5636bb2c | 844 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
708f4980 C |
845 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && |
846 | not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly -> | |
847 | (* this only works within a line. could consider whether | |
848 | something should be done to add newlines too, rather than | |
849 | printing them explicitly in unparse_cocci. *) | |
850 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) | |
65038c61 C |
851 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
852 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && | |
853 | not (lnx = -1) && lnx < lny && not (rcolx = -1) -> | |
854 | (* this only works within a line. could consider whether | |
855 | something should be done to add newlines too, rather than | |
856 | printing them explicitly in unparse_cocci. *) | |
857 | x::C2 (String.make (lny-lnx) '\n'):: | |
858 | C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *) | |
859 | add_space (y::xs) | |
1eddfd50 C |
860 | | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *) |
861 | let sx = str_of_token2 x in | |
862 | let sy = str_of_token2 y in | |
863 | if is_ident_like sx && (is_ident_like sy or List.mem sy ["="]) | |
864 | then x::C2 " "::(add_space (y::xs)) | |
865 | else x::(add_space (y::xs)) | |
866 | | x::y::xs -> (* not boundary, not sure if it is possible *) | |
34e49164 C |
867 | let sx = str_of_token2 x in |
868 | let sy = str_of_token2 y in | |
faf9a90c | 869 | if is_ident_like sx && is_ident_like sy |
34e49164 C |
870 | then x::C2 " "::(add_space (y::xs)) |
871 | else x::(add_space (y::xs)) | |
872 | ||
c491d8ee C |
873 | (* A fake comma is added at the end of an unordered initlist or a enum |
874 | decl, if the initlist or enum doesn't already end in a comma. This is only | |
875 | needed if there is + code, ie if we see Cocci after it in the code sequence *) | |
876 | ||
877 | let rec drop_end_comma = function | |
878 | [] -> [] | |
879 | | [x] -> [x] | |
880 | | ((Comma ",") as x) :: rest -> | |
881 | let (newlines,rest2) = Common.span is_whitespace rest in | |
882 | (match rest2 with | |
883 | (Cocci2 _) :: _ -> x :: drop_end_comma rest | |
884 | | _ -> drop_end_comma rest) | |
885 | | x :: xs -> x :: drop_end_comma xs | |
886 | ||
5636bb2c C |
887 | (* The following only works for the outermost function call. Stack records |
888 | the column of all open parentheses. Space_cell contains the most recent | |
889 | comma in the outermost function call. The goal is to decide whether this | |
890 | should be followed by a space or a newline and indent. *) | |
891 | let add_newlines toks tabbing_unit = | |
892 | let create_indent n = | |
893 | let (tu,tlen) = | |
894 | match tabbing_unit with | |
895 | Some ("\t",_) -> ("\t",8) | |
896 | | Some ("",_) -> ("\t",8) (* not sure why... *) | |
897 | | Some (s,_) -> (s,String.length s) (* assuming only spaces *) | |
898 | | None -> ("\t",8) in | |
899 | let rec loop seen = | |
900 | if seen + tlen <= n | |
901 | then tu ^ loop (seen + tlen) | |
902 | else String.make (n-seen) ' ' in | |
903 | loop 0 in | |
904 | let check_for_newline count x = function | |
905 | Some (start,space_cell) when count > Flag_parsing_c.max_width -> | |
906 | space_cell := "\n"^(create_indent x); | |
907 | Some (x + (count - start)) | |
908 | | _ -> None in | |
909 | (* the following is for strings that may contain newline *) | |
910 | let string_length s count = | |
911 | let l = list_of_string s in | |
912 | List.fold_left | |
913 | (function count -> | |
914 | function | |
915 | '\t' -> count + 8 | |
916 | | '\n' -> 0 | |
917 | | c -> count + 1) | |
918 | count l in | |
919 | let rec loop info count = function | |
920 | [] -> [] | |
921 | | ((T2(tok,_,_)) as a)::xs -> | |
922 | a :: loop info (string_length (TH.str_of_tok tok) count) xs | |
923 | | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs -> | |
924 | let (stack,space_cell) = info in | |
925 | let rest = | |
926 | match hint with | |
f537ebc4 | 927 | None -> loop info (string_length s count) xs |
5636bb2c | 928 | | Some Unparse_cocci.StartBox -> |
f537ebc4 | 929 | let count = string_length s count in |
5636bb2c C |
930 | loop (count::stack,space_cell) count xs |
931 | | Some Unparse_cocci.EndBox -> | |
f537ebc4 | 932 | let count = string_length s count in |
5636bb2c C |
933 | (match stack with |
934 | [x] -> | |
935 | (match check_for_newline count x space_cell with | |
936 | Some count -> loop ([],None) count xs | |
937 | | None -> loop ([],None) count xs) | |
938 | | _ -> loop (List.tl stack,space_cell) count xs) | |
939 | | Some (Unparse_cocci.SpaceOrNewline sp) -> | |
f537ebc4 | 940 | let count = string_length s (count + 1 (*space*)) in |
5636bb2c C |
941 | (match stack with |
942 | [x] -> | |
943 | (match check_for_newline count x space_cell with | |
944 | Some count -> loop (stack,Some (x,sp)) count xs | |
945 | | None -> loop (stack,Some (count,sp)) count xs) | |
946 | | _ -> loop info count xs) in | |
947 | a :: rest | |
948 | | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs | |
c491d8ee | 949 | | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs |
8babbc8f | 950 | | Fake2 _ :: _ | Indent_cocci2 :: _ |
5636bb2c C |
951 | | Unindent_cocci2 _::_ -> |
952 | failwith "unexpected fake, indent, or unindent" in | |
953 | let redo_spaces prev = function | |
954 | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) -> | |
955 | C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev | |
956 | | t -> t::prev in | |
957 | (match !Flag_parsing_c.spacing with | |
958 | Flag_parsing_c.SMPL -> toks | |
959 | | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks))) | |
34e49164 C |
960 | |
961 | (* When insert some new code, because of a + in a SP, we must add this | |
962 | * code at the right place, with the good indentation. So each time we | |
963 | * encounter some spacing info, with some newline, we maintain the | |
964 | * current indentation level used. | |
ae4735db | 965 | * |
34e49164 | 966 | * TODO problems: not accurate. ex: TODO |
ae4735db | 967 | * |
34e49164 C |
968 | * TODO: if in #define region, should add a \ \n |
969 | *) | |
ae4735db | 970 | let new_tabbing2 space = |
34e49164 C |
971 | (list_of_string space) |
972 | +> List.rev | |
b1b2de81 | 973 | +> Common.take_until (fun c -> c =<= '\n') |
34e49164 C |
974 | +> List.rev |
975 | +> List.map string_of_char | |
976 | +> String.concat "" | |
977 | ||
ae4735db | 978 | let new_tabbing a = |
34e49164 C |
979 | Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) |
980 | ||
981 | ||
ae4735db | 982 | let rec adjust_indentation xs = |
aa721442 | 983 | |
65038c61 | 984 | let _current_tabbing = ref ([] : string list) in |
34e49164 C |
985 | let tabbing_unit = ref None in |
986 | ||
987 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
988 | ||
989 | (* try to pick a tabbing unit for the plus code *) | |
990 | let adjust_tabbing_unit old_tab new_tab = | |
b1b2de81 | 991 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
34e49164 C |
992 | then |
993 | let old_tab = list_of_string old_tab in | |
994 | let new_tab = list_of_string new_tab in | |
995 | let rec loop = function | |
996 | ([],new_tab) -> | |
997 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
998 | | (_,[]) -> failwith "not possible" | |
999 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
1000 | loop (old_tab,new_tab) in | |
1001 | ||
65038c61 | 1002 | (* |
34e49164 C |
1003 | let remtab tu current_tab = |
1004 | let current_tab = List.rev(list_of_string current_tab) in | |
1005 | let rec loop = function | |
1006 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
65038c61 C |
1007 | | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-) |
1008 | "" | |
b1b2de81 | 1009 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) |
65038c61 | 1010 | | (_,ns) -> (-* mismatch; remove what we can *-) |
34e49164 C |
1011 | string_of_list (List.rev ns) in |
1012 | loop (tu,current_tab) in | |
65038c61 | 1013 | *) |
34e49164 C |
1014 | |
1015 | let rec find_first_tab started = function | |
1016 | [] -> () | |
b1b2de81 | 1017 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
34e49164 | 1018 | find_first_tab true xs |
485bce71 | 1019 | (* patch: coccinelle *) |
34e49164 C |
1020 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_ |
1021 | when started -> | |
1022 | let s = str_of_token2 x +> new_tabbing in | |
1023 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
1024 | | x::xs -> find_first_tab started xs in | |
1025 | find_first_tab false xs; | |
1026 | ||
1eddfd50 C |
1027 | let rec balanced ct = function |
1028 | [] -> ct >= 0 | |
1029 | | ((T2(tok,_,_)) as x)::xs -> | |
1030 | (match str_of_token2 x with | |
1031 | "(" -> balanced (ct+1) xs | |
1032 | | ")" -> balanced (ct-1) xs | |
1033 | | _ -> balanced ct xs) | |
1034 | | x::xs -> balanced ct xs in | |
1035 | ||
ae4735db | 1036 | let rec aux started xs = |
34e49164 C |
1037 | match xs with |
1038 | | [] -> [] | |
485bce71 | 1039 | (* patch: coccinelle *) |
8f657093 C |
1040 | | (T2 (Parser_c.TCommentNewline _,_,_))::Unindent_cocci2(false)::xs |
1041 | | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs -> | |
1042 | (C2 "\n")::aux started xs | |
34e49164 | 1043 | | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: |
5636bb2c | 1044 | ((Cocci2 ("{",_,_,_,_)) as a)::xs |
708f4980 | 1045 | when started && str_of_token2 x =$= ")" -> |
34e49164 | 1046 | (* to be done for if, etc, but not for a function header *) |
708f4980 | 1047 | x::(C2 " ")::a::(aux started xs) |
1eddfd50 C |
1048 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs |
1049 | when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) -> | |
ae4735db | 1050 | let old_tabbing = !_current_tabbing in |
65038c61 | 1051 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]); |
34e49164 | 1052 | (* only trust the indentation after the first { *) |
c3e37e97 | 1053 | (if started |
65038c61 C |
1054 | then |
1055 | adjust_tabbing_unit | |
1056 | (String.concat "" old_tabbing) | |
1057 | (String.concat "" !_current_tabbing)); | |
34e49164 C |
1058 | let coccis_rest = Common.span all_coccis xs in |
1059 | (match coccis_rest with | |
b1b2de81 | 1060 | (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" -> |
34e49164 C |
1061 | (* the case where cocci code has been added before a close } *) |
1062 | x::aux started (Indent_cocci2::xs) | |
1063 | | _ -> x::aux started xs) | |
65038c61 C |
1064 | | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
1065 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL -> | |
1066 | let tu = String.make (lcoly-1) ' ' in | |
1067 | _current_tabbing := tu::(!_current_tabbing); | |
1068 | C2 (tu)::aux started (y::xs) | |
34e49164 C |
1069 | | Indent_cocci2::xs -> |
1070 | (match !tabbing_unit with | |
1071 | None -> aux started xs | |
1072 | | Some (tu,_) -> | |
65038c61 C |
1073 | _current_tabbing := tu::(!_current_tabbing); |
1074 | (* can't be C2, for later phases *) | |
1075 | Cocci2 (tu,-1,-1,-1,None)::aux started xs) | |
c3e37e97 | 1076 | | Unindent_cocci2(permanent)::xs -> |
65038c61 C |
1077 | (match !_current_tabbing with |
1078 | [] -> aux started xs | |
1079 | | _::new_tabbing -> | |
1080 | _current_tabbing := new_tabbing; | |
34e49164 C |
1081 | aux started xs) |
1082 | (* border between existing code and cocci code *) | |
5636bb2c | 1083 | | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs |
b1b2de81 | 1084 | when str_of_token2 x =$= "{" -> |
34e49164 C |
1085 | x::aux true (y::Indent_cocci2::xs) |
1086 | | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs | |
b1b2de81 | 1087 | when str_of_token2 y =$= "}" -> |
c3e37e97 | 1088 | x::aux started (y::Unindent_cocci2 true::xs) |
34e49164 | 1089 | (* starting the body of the function *) |
b1b2de81 | 1090 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs |
5636bb2c | 1091 | | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs |
5636bb2c | 1092 | | ((Cocci2("\n",_,_,_,_)) as x)::xs -> |
0708f913 | 1093 | (* dont inline in expr because of weird eval order of ocaml *) |
65038c61 C |
1094 | let s = String.concat "" !_current_tabbing in |
1095 | (* can't be C2, for later phases *) | |
5636bb2c | 1096 | x::Cocci2 (s,-1,-1,-1,None)::aux started xs |
34e49164 | 1097 | | x::xs -> x::aux started xs in |
5636bb2c | 1098 | (aux false xs,!tabbing_unit) |
34e49164 C |
1099 | |
1100 | ||
1101 | let rec find_paren_comma = function | |
1102 | | [] -> () | |
1103 | ||
1104 | (* do nothing if was like this in original file *) | |
1105 | | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
ae4735db | 1106 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
1107 | find_paren_comma (x2::xs) |
1108 | ||
1109 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
ae4735db | 1110 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
1111 | find_paren_comma (x2::xs) |
1112 | ||
1113 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) | |
ae4735db | 1114 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
1115 | find_paren_comma (x2::xs) |
1116 | ||
1117 | (* otherwise yes can adjust *) | |
ae4735db | 1118 | | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> |
34e49164 C |
1119 | x2.remove <- true; |
1120 | find_paren_comma (x2::xs) | |
ae4735db | 1121 | | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> |
34e49164 C |
1122 | x1.remove <- true; |
1123 | find_paren_comma (x2::xs) | |
1124 | ||
ae4735db | 1125 | | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> |
34e49164 C |
1126 | x1.remove <- true; |
1127 | find_paren_comma (x2::xs) | |
1128 | ||
ae4735db | 1129 | | x::xs -> |
34e49164 | 1130 | find_paren_comma xs |
34e49164 | 1131 | |
ae4735db C |
1132 | |
1133 | let fix_tokens toks = | |
34e49164 C |
1134 | let toks = toks +> List.map mk_token_extended in |
1135 | ||
1136 | let cleaner = toks +> Common.exclude (function | |
1137 | | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
1138 | | _ -> false | |
1139 | ) in | |
1140 | find_paren_comma cleaner; | |
1141 | ||
1142 | let toks = rebuild_tokens_extented toks in | |
1143 | toks +> List.map (fun x -> x.tok2) | |
1144 | ||
1145 | ||
1146 | ||
1147 | (*****************************************************************************) | |
1148 | (* Final unparsing (and debugging support) *) | |
1149 | (*****************************************************************************) | |
1150 | ||
1151 | (* for debugging *) | |
1152 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
1153 | ||
1154 | let kind_of_token2 = function | |
8babbc8f | 1155 | | Fake2 _ -> KFake |
34e49164 C |
1156 | | Cocci2 _ -> KCocci |
1157 | | C2 _ -> KC | |
c491d8ee | 1158 | | Comma _ -> KC |
34e49164 C |
1159 | | T2 (t,_,_) -> |
1160 | (match TH.pinfo_of_tok t with | |
1161 | | ExpandedTok _ -> KExpanded | |
1162 | | OriginTok _ -> KOrigin | |
1163 | | FakeTok _ -> raise Impossible (* now a Fake2 *) | |
1164 | | AbstractLineTok _ -> raise Impossible (* now a KC *) | |
1165 | ) | |
c3e37e97 | 1166 | | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible |
ae4735db | 1167 | |
34e49164 C |
1168 | let end_mark = "!" |
1169 | ||
1170 | let start_mark = function | |
1171 | | KFake -> "!F!" | |
ae4735db | 1172 | | KCocci -> "!S!" |
34e49164 C |
1173 | | KC -> "!A!" |
1174 | | KExpanded -> "!E!" | |
1175 | | KOrigin -> "" | |
1176 | ||
1177 | let print_all_tokens2 pr xs = | |
1178 | if !Flag_parsing_c.debug_unparsing | |
1179 | then | |
1180 | let current_kind = ref KOrigin in | |
ae4735db | 1181 | xs +> List.iter (fun t -> |
34e49164 | 1182 | let newkind = kind_of_token2 t in |
b1b2de81 | 1183 | if newkind =*= !current_kind |
34e49164 C |
1184 | then pr (str_of_token2 t) |
1185 | else begin | |
1186 | pr (end_mark); | |
1187 | pr (start_mark newkind); | |
1188 | pr (str_of_token2 t); | |
1189 | current_kind := newkind | |
1190 | end | |
1191 | ); | |
ae4735db | 1192 | else |
34e49164 | 1193 | xs +> List.iter (fun x -> pr (str_of_token2 x)) |
ae4735db | 1194 | |
34e49164 C |
1195 | |
1196 | ||
1197 | ||
1198 | (*****************************************************************************) | |
1199 | (* Entry points *) | |
1200 | (*****************************************************************************) | |
1201 | ||
1202 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
1203 | * complete C file, including a modified C file by transformation.ml, | |
1204 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
1205 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
1206 | * fancy stuff when a function was not modified at all. Just need to | |
1207 | * print the list of token as-is. But now pretty_print_c.ml handles | |
1208 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
ae4735db C |
1209 | * optimize a little the pretty printing. |
1210 | * | |
34e49164 C |
1211 | * update: now have PPviastr which goes even faster than PPviatok, so |
1212 | * PPviatok has disappeared. | |
1213 | *) | |
1214 | ||
1215 | type ppmethod = PPnormal | PPviastr | |
1216 | ||
1217 | ||
1218 | ||
1219 | ||
1220 | (* The pp_program function will call pretty_print_c.ml with a special | |
ae4735db | 1221 | * function to print the leaf components, the tokens. When we want to |
34e49164 | 1222 | * print a token, we need to print also maybe the space and comments that |
ae4735db | 1223 | * were close to it in the original file (and that was omitted during the |
34e49164 C |
1224 | * parsing phase), and honor what the cocci-info attached to the token says. |
1225 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
ae4735db | 1226 | * print it and also print some cocci-code attached in a PLUS to it. |
34e49164 C |
1227 | * So we will also maybe call unparse_cocci. Because the cocci-code may |
1228 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
1229 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
1230 | *) | |
1231 | ||
ae4735db C |
1232 | let pp_program2 xs outfile = |
1233 | Common.with_open_outfile outfile (fun (pr,chan) -> | |
1234 | let pr s = | |
1235 | if !Flag_parsing_c.debug_unparsing | |
34e49164 | 1236 | then begin pr2_no_nl s; flush stderr end |
ae4735db | 1237 | else pr s |
34e49164 C |
1238 | (* flush chan; *) |
1239 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
1240 | in | |
ae4735db C |
1241 | |
1242 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
34e49164 C |
1243 | (* here can still work on ast *) |
1244 | let e = remove_useless_fakeInfo_struct e in | |
ae4735db | 1245 | |
34e49164 | 1246 | match ppmethod with |
002099fc | 1247 | | PPnormal -> |
34e49164 | 1248 | (* now work on tokens *) |
34e49164 | 1249 | (* phase1: just get all the tokens, all the information *) |
ae4735db | 1250 | assert(toks_e +> List.for_all (fun t -> |
34e49164 C |
1251 | TH.is_origin t or TH.is_expanded t |
1252 | )); | |
1253 | let toks = get_fakeInfo_and_tokens e toks_e in | |
1254 | let toks = displace_fake_nodes toks in | |
1255 | (* assert Origin;ExpandedTok;Faketok *) | |
1256 | let toks = expand_mcode toks in | |
c491d8ee | 1257 | |
34e49164 C |
1258 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) |
1259 | * and no tag information, just NOTHING. *) | |
fc1ad971 C |
1260 | |
1261 | let toks = | |
1262 | if !Flag.sgrep_mode2 | |
5626f154 C |
1263 | then |
1264 | (* nothing else to do for sgrep *) | |
8babbc8f | 1265 | drop_expanded(drop_fake(drop_minus toks)) |
fc1ad971 C |
1266 | else |
1267 | (* phase2: can now start to filter and adjust *) | |
65038c61 | 1268 | (let (toks,tu) = adjust_indentation toks in |
fc1ad971 | 1269 | let toks = adjust_before_semicolon toks in(*before remove minus*) |
8babbc8f | 1270 | let toks = adjust_after_paren toks in(*also before remove minus*) |
aba5c457 C |
1271 | let toks = drop_space_at_endline toks in |
1272 | let toks = paren_to_space toks in | |
c491d8ee | 1273 | let toks = drop_end_comma toks in |
fc1ad971 C |
1274 | let toks = remove_minus_and_between_and_expanded_and_fake toks in |
1275 | (* assert Origin + Cocci + C and no minus *) | |
1276 | let toks = add_space toks in | |
5636bb2c | 1277 | let toks = add_newlines toks tu in |
65038c61 | 1278 | let toks = paren_then_brace toks in |
fc1ad971 | 1279 | let toks = fix_tokens toks in |
65038c61 | 1280 | toks) in |
34e49164 | 1281 | |
ae4735db | 1282 | (* in theory here could reparse and rework the ast! or |
34e49164 C |
1283 | * apply some SP. Not before cos julia may have generated |
1284 | * not parsable file. Need do unparsing_tricks call before being | |
1285 | * ready to reparse. *) | |
1286 | print_all_tokens2 pr toks; | |
1287 | ||
1288 | | PPviastr -> pr str | |
1289 | ) | |
1290 | ) | |
1291 | ||
ae4735db | 1292 | let pp_program a b = |
34e49164 C |
1293 | Common.profile_code "C unparsing" (fun () -> pp_program2 a b) |
1294 | ||
485bce71 | 1295 | |
ae4735db | 1296 | let pp_program_default xs outfile = |
485bce71 C |
1297 | let xs' = xs +> List.map (fun x -> x, PPnormal) in |
1298 | pp_program xs' outfile |