Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau, Julia Lawall |
ae4735db | 2 | * |
17ba0788 C |
3 | * Copyright (C) 2012, INRIA. |
4 | * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA. | |
0708f913 | 5 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU |
34e49164 C |
6 | * |
7 | * This program is free software; you can redistribute it and/or | |
8 | * modify it under the terms of the GNU General Public License (GPL) | |
9 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 10 | * |
34e49164 C |
11 | * This program is distributed in the hope that it will be useful, |
12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | * file license.txt for more details. | |
ae4735db C |
15 | * |
16 | * | |
485bce71 | 17 | * Modifications by Julia Lawall for better newline handling. |
34e49164 C |
18 | *) |
19 | open Common | |
20 | ||
34e49164 C |
21 | module TH = Token_helpers |
22 | ||
b1b2de81 C |
23 | (* should keep comments and directives in between adjacent deleted terms, |
24 | but not comments and directives within deleted terms. should use the | |
25 | labels found in the control-flow graph *) | |
26 | ||
34e49164 C |
27 | |
28 | ||
708f4980 C |
29 | (*****************************************************************************) |
30 | (* Wrappers *) | |
31 | (*****************************************************************************) | |
abad11c5 | 32 | let pr2, pr2_once = mk_pr2_wrappers Flag_parsing_c.verbose_unparsing |
708f4980 | 33 | |
34e49164 C |
34 | (*****************************************************************************) |
35 | (* Types used during the intermediate phases of the unparsing *) | |
36 | (*****************************************************************************) | |
37 | ||
ae4735db | 38 | type token1 = |
abad11c5 | 39 | | Fake1 of Ast_c.info |
34e49164 C |
40 | | T1 of Parser_c.token |
41 | ||
42 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
43 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
44 | * token and get something simpler ? because we need to know if the | |
45 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
46 | * analysis so easier to keep with the token. | |
ae4735db | 47 | * |
34e49164 C |
48 | * This type contains the whole information. Have all the tokens with this |
49 | * type. | |
50 | *) | |
708f4980 | 51 | type min = |
abad11c5 C |
52 | | Min of (int list (* match numbers from witness trees *) * |
53 | Ast_cocci.adjacency (* adjacency information *)) | |
708f4980 C |
54 | | Ctx |
55 | ||
ae4735db | 56 | type token2 = |
abad11c5 C |
57 | | T2 of Parser_c.token * min |
58 | * int option (* orig index, abstracting away comments and space *) | |
59 | * Unparse_cocci.nlhint option | |
8babbc8f | 60 | | Fake2 of min |
708f4980 | 61 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
abad11c5 | 62 | * Unparse_cocci.nlhint option |
34e49164 | 63 | | C2 of string |
c491d8ee | 64 | | Comma of string |
34e49164 | 65 | | Indent_cocci2 |
c3e37e97 | 66 | | Unindent_cocci2 of bool (* true for permanent, false for temporary *) |
993936c0 | 67 | | EatSpace2 |
34e49164 C |
68 | |
69 | (* not used yet *) | |
ae4735db | 70 | type token3 = |
34e49164 C |
71 | | T3 of Parser_c.token |
72 | | Cocci3 of string | |
73 | | C3 of string | |
74 | ||
75 | ||
76 | (* similar to the tech in parsing_hack *) | |
abad11c5 C |
77 | type token_extended = |
78 | { tok2 : token2; | |
79 | str : string; | |
80 | idx : int option; (* to know if 2 tokens were consecutive in orig file *) | |
81 | mutable new_tokens_before : token2 list; | |
82 | mutable remove : bool; | |
83 | } | |
34e49164 C |
84 | |
85 | ||
86 | (*****************************************************************************) | |
87 | (* Helpers *) | |
88 | (*****************************************************************************) | |
89 | ||
ae4735db | 90 | let info_of_token1 t = |
34e49164 C |
91 | match t with |
92 | | Fake1 info -> info | |
93 | | T1 tok -> TH.info_of_tok tok | |
94 | ||
002099fc | 95 | let print_token1 = function |
abad11c5 | 96 | | T1 tok -> TH.str_of_tok tok |
002099fc C |
97 | | Fake1 info -> "fake" |
98 | ||
34e49164 | 99 | let str_of_token2 = function |
abad11c5 C |
100 | | T2 (t,_,_,_) -> TH.str_of_tok t |
101 | | Cocci2 (s,_,_,_,_) | |
102 | | C2 s | |
c491d8ee | 103 | | Comma s -> s |
abad11c5 C |
104 | | Fake2 _ |
105 | | Indent_cocci2 | |
106 | | Unindent_cocci2 _ | |
993936c0 | 107 | | EatSpace2 -> "" |
34e49164 C |
108 | |
109 | let print_token2 = function | |
abad11c5 C |
110 | | T2 (t,b,_,_) -> |
111 | let t_str = | |
112 | match t with | |
113 | | Parser_c.TCommentSpace _ -> " sp " | |
114 | | Parser_c.TCommentNewline _ -> " nl " | |
115 | | Parser_c.TCommentCpp _ -> " cp " | |
116 | | Parser_c.TCommentMisc _ -> " misc " | |
117 | | Parser_c.TComment _ -> " comment " | |
118 | | _ -> "" in | |
119 | let b_str = | |
120 | match b with | |
121 | | Min (index,adj) -> | |
122 | Printf.sprintf "-.%d[%s]" | |
123 | (match adj with Ast_cocci.ADJ n -> n | _ -> -1) | |
124 | (String.concat " " (List.map string_of_int index)) | |
125 | | Ctx -> "" in | |
126 | "T2:"^b_str^t_str^TH.str_of_tok t | |
8babbc8f | 127 | | Fake2 b -> |
abad11c5 C |
128 | let b_str = |
129 | match b with | |
130 | | Min (index,adj) -> | |
131 | Printf.sprintf "-%d[%s]" | |
132 | (match adj with Ast_cocci.ADJ n -> n | _ -> -1) | |
133 | (String.concat " " (List.map string_of_int index)) | |
134 | | Ctx -> "" in | |
135 | b_str^"fake" | |
5636bb2c | 136 | | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
34e49164 | 137 | | C2 s -> "C2:"^s |
c491d8ee | 138 | | Comma s -> "Comma:"^s |
34e49164 | 139 | | Indent_cocci2 -> "Indent" |
c3e37e97 | 140 | | Unindent_cocci2 _ -> "Unindent" |
993936c0 | 141 | | EatSpace2 -> "EatSpace" |
34e49164 | 142 | |
abad11c5 C |
143 | (* |
144 | let simple_print_all_tokens pr l = | |
145 | List.iter (function x -> Printf.printf "|%s| " (pr x)) l; | |
34e49164 | 146 | Printf.printf "\n" |
abad11c5 | 147 | *) |
34e49164 C |
148 | |
149 | let str_of_token3 = function | |
150 | | T3 t -> TH.str_of_tok t | |
151 | | Cocci3 s | C3 s -> s | |
152 | ||
153 | ||
154 | ||
ae4735db C |
155 | let mk_token_extended x = |
156 | let origidx = | |
34e49164 | 157 | match x with |
abad11c5 C |
158 | | T2 (_,_,idx,_) -> idx |
159 | | _ -> None in | |
ae4735db | 160 | { tok2 = x; |
34e49164 C |
161 | str = str_of_token2 x; |
162 | idx = origidx; | |
163 | new_tokens_before = []; | |
164 | remove = false; | |
165 | } | |
166 | ||
ae4735db | 167 | let rebuild_tokens_extented toks_ext = |
34e49164 | 168 | let _tokens = ref [] in |
ae4735db | 169 | toks_ext +> List.iter (fun tok -> |
34e49164 C |
170 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); |
171 | if not tok.remove then push2 tok.tok2 _tokens; | |
abad11c5 | 172 | ); |
34e49164 C |
173 | let tokens = List.rev !_tokens in |
174 | (tokens +> List.map mk_token_extended) | |
175 | ||
176 | ||
177 | let mcode_contain_plus = function | |
178 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
179 | | Ast_cocci.CONTEXT _ -> true | |
abad11c5 | 180 | (* patch: when need full coccinelle transformation *) |
8babbc8f C |
181 | | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false |
182 | | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*) | |
abad11c5 | 183 | | Ast_cocci.PLUS _ -> raise (Impossible 132) |
34e49164 | 184 | |
ae4735db | 185 | let contain_plus info = |
34e49164 C |
186 | let mck = Ast_c.mcode_of_info info in |
187 | mcode_contain_plus mck | |
188 | ||
189 | (*****************************************************************************) | |
190 | (* Last fix on the ast *) | |
191 | (*****************************************************************************) | |
192 | ||
ae4735db | 193 | (* Because of the ugly trick to handle initialiser, I generate fake ',' |
34e49164 C |
194 | * for the last initializer element, but if there is nothing around it, |
195 | * I don't want in the end to print it. | |
196 | *) | |
197 | ||
ae4735db | 198 | let remove_useless_fakeInfo_struct program = |
34e49164 | 199 | let bigf = { Visitor_c.default_visitor_c_s with |
ae4735db | 200 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
34e49164 | 201 | match k ini with |
abad11c5 C |
202 | | Ast_c.InitList args, ii -> |
203 | (match ii with | |
204 | | [_;_] -> ini | |
205 | | i1 :: i2 :: iicommaopt :: tl when | |
206 | (not (contain_plus iicommaopt)) | |
207 | && (not (contain_plus i2)) | |
208 | && (Ast_c.is_fake iicommaopt) -> | |
209 | (* sometimes the guy put a normal iicommaopt *) | |
210 | Ast_c.InitList args, (i1 :: i2 :: tl) | |
211 | | ii -> Ast_c.InitList args, ii | |
212 | ) | |
213 | | x -> x) | |
214 | } in | |
34e49164 C |
215 | Visitor_c.vk_toplevel_s bigf program |
216 | ||
217 | ||
218 | (*****************************************************************************) | |
219 | (* Tokens1 generation *) | |
220 | (*****************************************************************************) | |
221 | ||
ae4735db | 222 | let get_fakeInfo_and_tokens celem toks = |
1b9ae606 | 223 | |
ae4735db | 224 | let toks_in = ref toks in |
34e49164 C |
225 | let toks_out = ref [] in |
226 | ||
227 | (* todo? verify good order of position ? *) | |
ae4735db | 228 | let pr_elem info = |
34e49164 | 229 | match Ast_c.pinfo_of_info info with |
abad11c5 C |
230 | | Ast_c.FakeTok _ -> |
231 | push2 (Fake1 info) toks_out | |
232 | | Ast_c.OriginTok _ | Ast_c.ExpandedTok _ -> | |
1b9ae606 | 233 | |
abad11c5 C |
234 | (* get the associated comments/space/cppcomment tokens *) |
235 | let (before, x, after) = | |
236 | !toks_in +> split_when (fun tok -> | |
237 | info =*= TH.info_of_tok tok) | |
238 | in | |
239 | assert(info =*= TH.info_of_tok x); | |
240 | (*old: assert(before +> List.for_all (TH.is_comment)); *) | |
241 | before +> List.iter (fun x -> | |
242 | if not (TH.is_comment x) | |
243 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) | |
244 | (* case such as int asm d3("x"); not yet in ast *) | |
34e49164 | 245 | ); |
abad11c5 | 246 | before +> List.iter (fun x -> push2 (T1 x) toks_out); |
1b9ae606 | 247 | |
abad11c5 C |
248 | push2 (T1 x) toks_out; |
249 | toks_in := after; | |
250 | | Ast_c.AbstractLineTok _ -> | |
251 | (* can be called on type info when for instance use -type_c *) | |
252 | if !Flag_parsing_c.pretty_print_type_info | |
253 | then push2 (Fake1 info) toks_out | |
254 | else raise (Impossible 134) (* at this stage *) | |
34e49164 C |
255 | in |
256 | ||
257 | let pr_space _ = () in (* use the spacing that is there already *) | |
258 | ||
259 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
260 | ||
261 | if not (null !toks_in) | |
0708f913 | 262 | then failwith "WEIRD: unparsing not finished"; |
34e49164 C |
263 | |
264 | List.rev !toks_out | |
265 | ||
8babbc8f | 266 | (* Fake nodes that have BEFORE code or are - should be moved over any subsequent |
34e49164 | 267 | whitespace and newlines, but not any comments, to get as close to the affected |
002099fc | 268 | code as possible. Similarly, fake nodes that have AFTER code should be moved |
34e49164 C |
269 | backwards. No fake nodes should have both before and after code. *) |
270 | ||
271 | let displace_fake_nodes toks = | |
272 | let is_fake = function Fake1 _ -> true | _ -> false in | |
273 | let is_whitespace = function | |
abad11c5 C |
274 | | T1(Parser_c.TCommentSpace _) |
275 | (* patch: cocci *) | |
34e49164 C |
276 | | T1(Parser_c.TCommentNewline _) -> true |
277 | | _ -> false in | |
278 | let rec loop toks = | |
279 | let fake_info = | |
abad11c5 | 280 | try Some (split_when is_fake toks) |
34e49164 C |
281 | with Not_found -> None in |
282 | match fake_info with | |
abad11c5 C |
283 | | Some(bef,((Fake1 info) as fake),aft) -> |
284 | (match !(info.Ast_c.cocci_tag) with | |
285 | | Some x -> | |
286 | (match x with | |
287 | | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) | |
288 | (* for , replacement is more likely to be like after, but not clear... | |
289 | but treating it as after breaks a lot of tests. *) | |
290 | ||
291 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> | |
292 | (* move the fake node forwards *) | |
293 | let (whitespace,rest) = span is_whitespace aft in | |
294 | bef @ whitespace @ fake :: (loop rest) | |
295 | ||
296 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
297 | (* move the fake node backwards *) | |
298 | let revbef = List.rev bef in | |
299 | let (revwhitespace,revprev) = span is_whitespace revbef in | |
300 | let whitespace = List.rev revwhitespace in | |
301 | let prev = List.rev revprev in | |
302 | prev @ fake :: (loop (whitespace @ aft)) | |
303 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
304 | failwith "fake node should not be before-after" | |
305 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) | |
306 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) | |
708f4980 | 307 | ) |
abad11c5 C |
308 | | None -> |
309 | bef @ fake :: (loop aft) | |
310 | ) | |
34e49164 | 311 | | None -> toks |
abad11c5 | 312 | | _ -> raise (Impossible 135) in |
34e49164 C |
313 | loop toks |
314 | ||
315 | (*****************************************************************************) | |
316 | (* Tokens2 generation *) | |
317 | (*****************************************************************************) | |
318 | ||
b1b2de81 | 319 | let comment2t2 = function |
abad11c5 C |
320 | | (Token_c.TCommentCpp |
321 | (* not sure iif the following list is exhaustive or complete *) | |
322 | (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError), | |
323 | (info : Token_c.info)) -> | |
324 | C2(info.Common.str) | |
5636bb2c | 325 | | (Token_c.TCommentCpp x,(info : Token_c.info)) -> |
abad11c5 C |
326 | C2("\n"^info.Common.str^"\n") |
327 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Dumper.dump x)) | |
b1b2de81 | 328 | |
ae4735db | 329 | let expand_mcode toks = |
34e49164 C |
330 | let toks_out = ref [] in |
331 | ||
332 | let index = ref 0 in | |
333 | ||
ae4735db | 334 | let add_elem t minus = |
34e49164 | 335 | match t with |
ae4735db | 336 | | Fake1 info -> |
abad11c5 C |
337 | let str = Ast_c.str_of_info info in |
338 | let isminus = match minus with Min _ -> true | Ctx -> false in | |
339 | (* don't add fake string if the thing should be removed *) | |
340 | if str =$= "" or isminus | |
341 | then push2 (Fake2 minus) toks_out | |
342 | (* fx the fake "," at the end of a structure or enum. | |
343 | no idea what other fake info there can be... *) | |
344 | else push2 (Comma str) toks_out | |
ae4735db | 345 | |
002099fc | 346 | | T1 tok -> |
abad11c5 C |
347 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
348 | (* no tag on expandedTok ! *) | |
349 | let modified = function | |
350 | | None -> false | |
351 | | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false | |
352 | | _ -> true in | |
353 | (if TH.is_expanded tok && | |
354 | modified !((TH.info_of_tok tok).Ast_c.cocci_tag) | |
355 | (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*) | |
356 | then | |
357 | failwith | |
358 | (Printf.sprintf | |
359 | "expanded token %s on line %d is either modified or stored in a metavariable" | |
360 | (TH.str_of_tok tok) (TH.line_of_tok tok))); | |
361 | ||
362 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> | |
363 | { i with Ast_c.cocci_tag = ref Ast_c.emptyAnnot; } | |
364 | ) in | |
365 | ||
366 | let optindex = | |
367 | if TH.is_origin tok && not (TH.is_real_comment tok) | |
368 | then | |
369 | begin | |
370 | incr index; | |
371 | Some !index | |
34e49164 | 372 | end |
abad11c5 C |
373 | else None |
374 | in | |
34e49164 | 375 | |
abad11c5 | 376 | push2 (T2 (tok', minus, optindex, None)) toks_out |
34e49164 C |
377 | in |
378 | ||
ae4735db C |
379 | let expand_info t = |
380 | let (mcode,env) = | |
abad11c5 | 381 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).Ast_c.cocci_tag) in |
34e49164 | 382 | |
5636bb2c | 383 | let pr_cocci s ln col rcol hint = |
abad11c5 | 384 | push2 (Cocci2 (s,ln,col,rcol,hint)) toks_out in |
ae4735db | 385 | let pr_c info = |
b1b2de81 | 386 | (match Ast_c.pinfo_of_info info with |
abad11c5 C |
387 | | Ast_c.AbstractLineTok _ -> |
388 | push2 (C2 (Ast_c.str_of_info info)) toks_out | |
b1b2de81 | 389 | | Ast_c.FakeTok (s,_) -> |
abad11c5 | 390 | push2 (C2 s) toks_out |
34e49164 | 391 | | _ -> |
abad11c5 C |
392 | Printf.fprintf stderr "line: %s\n" (Dumper.dump info); |
393 | failwith "not an abstract line" | |
394 | ); | |
b1b2de81 | 395 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> |
abad11c5 | 396 | List.iter (fun x -> push2 (comment2t2 x) toks_out) in |
b1b2de81 | 397 | |
708f4980 | 398 | let pr_barrier ln col = (* marks a position, used around C code *) |
abad11c5 | 399 | push2 (Cocci2 ("",ln,col,col,None)) toks_out in |
708f4980 | 400 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) |
b1b2de81 | 401 | |
708f4980 | 402 | let pr_cspace _ = push2 (C2 " ") toks_out in |
34e49164 | 403 | |
708f4980 C |
404 | let pr_space _ = () (* rely on add_space in cocci code *) in |
405 | let pr_arity _ = () (* not interested *) in | |
34e49164 | 406 | |
abad11c5 | 407 | let indent _ = push2 Indent_cocci2 toks_out in |
c3e37e97 | 408 | let unindent x = push2 (Unindent_cocci2 x) toks_out in |
abad11c5 | 409 | let eat_space _ = push2 EatSpace2 toks_out in |
34e49164 | 410 | |
708f4980 C |
411 | let args_pp = |
412 | (env, pr_cocci, pr_c, pr_cspace, | |
abad11c5 C |
413 | (match !Flag_parsing_c.spacing with |
414 | | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), | |
415 | pr_arity, | |
416 | (match !Flag_parsing_c.spacing with | |
417 | | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), | |
418 | indent, unindent, eat_space) in | |
419 | ||
420 | (* old: when for yacfe with partial cocci: | |
421 | * add_elem t false; | |
422 | *) | |
34e49164 | 423 | |
abad11c5 | 424 | (* patch: when need full coccinelle transformation *) |
faf9a90c | 425 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
34e49164 | 426 | match mcode with |
ae4735db | 427 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
abad11c5 C |
428 | (* Why adding ? because I want to have all the information, the whole |
429 | * set of tokens, so I can then process and remove the | |
430 | * is_between_two_minus for instance *) | |
431 | add_elem t (Min (inst,adj)); | |
432 | (match any_xxs with | |
433 | | Ast_cocci.NOREPLACEMENT -> () | |
434 | | Ast_cocci.REPLACEMENT(any_xxs,_) -> | |
435 | unparser any_xxs Unparse_cocci.InPlace | |
436 | ) | |
ae4735db | 437 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
abad11c5 C |
438 | (match any_befaft with |
439 | | Ast_cocci.NOTHING -> | |
440 | add_elem t Ctx | |
441 | | Ast_cocci.BEFORE (xxs,_) -> | |
442 | unparser xxs Unparse_cocci.Before; | |
443 | add_elem t Ctx | |
444 | | Ast_cocci.AFTER (xxs,_) -> | |
445 | add_elem t Ctx; | |
446 | unparser xxs Unparse_cocci.After; | |
447 | | Ast_cocci.BEFOREAFTER (xxs, yys, _) -> | |
448 | unparser xxs Unparse_cocci.Before; | |
449 | add_elem t Ctx; | |
450 | unparser yys Unparse_cocci.After; | |
451 | ) | |
452 | | Ast_cocci.PLUS _ -> raise (Impossible 136) | |
34e49164 C |
453 | in |
454 | ||
455 | toks +> List.iter expand_info; | |
456 | List.rev !toks_out | |
ae4735db | 457 | |
34e49164 C |
458 | |
459 | (*****************************************************************************) | |
460 | (* Tokens2 processing, filtering, adjusting *) | |
461 | (*****************************************************************************) | |
462 | ||
002099fc | 463 | let is_space = function |
abad11c5 | 464 | | T2(Parser_c.TCommentSpace _,_b,_i,_h) -> true (* only whitespace *) |
ae4735db | 465 | | _ -> false |
fc1ad971 C |
466 | |
467 | let is_newline = function | |
abad11c5 | 468 | | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> true |
fc1ad971 C |
469 | | _ -> false |
470 | ||
abad11c5 C |
471 | let is_whitespace x = |
472 | is_space x or is_newline x | |
002099fc | 473 | |
34e49164 | 474 | let is_minusable_comment = function |
abad11c5 C |
475 | | (T2 (t,_b,_i,_h)) -> |
476 | (match t with | |
477 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
478 | (* patch: coccinelle *) | |
479 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true | |
480 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false | |
481 | | Parser_c.TComment _ | |
482 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
483 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) | |
484 | | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _) | |
485 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> (* result was false *) | |
486 | true | |
487 | (* | |
488 | | Parser_c.TCommentMisc _ | |
489 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) -> | |
490 | false | |
491 | *) | |
492 | | _ -> false | |
493 | ) | |
ae4735db | 494 | | _ -> false |
34e49164 | 495 | |
fc1ad971 | 496 | let is_minusable_comment_nocpp = function |
abad11c5 C |
497 | | (T2 (t,_b,_i,_h)) -> |
498 | (match t with | |
499 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
500 | (* patch: coccinelle *) | |
501 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true | |
502 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false | |
503 | | Parser_c.TComment _ -> true | |
504 | (* | |
505 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
506 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) | |
507 | | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _) | |
508 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> | |
509 | false | |
510 | ||
511 | | Parser_c.TCommentMisc _ | |
512 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) -> | |
513 | false | |
514 | *) | |
515 | | _ -> false | |
516 | ) | |
ae4735db | 517 | | _ -> false |
fc1ad971 | 518 | |
34e49164 | 519 | let all_coccis = function |
abad11c5 C |
520 | | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 |
521 | | Unindent_cocci2 _ | EatSpace2 -> true | |
34e49164 C |
522 | | _ -> false |
523 | ||
abad11c5 C |
524 | (* previously gave up if the first character was a newline, but not clear why *) |
525 | let is_minusable_comment_or_plus x = | |
526 | is_minusable_comment x or all_coccis x | |
34e49164 | 527 | |
708f4980 | 528 | let set_minus_comment adj = function |
abad11c5 C |
529 | | T2 (t,Ctx,idx,hint) -> |
530 | let str = TH.str_of_tok t in | |
531 | (match t with | |
532 | | Parser_c.TCommentSpace _ | |
533 | (* patch: coccinelle *) | |
534 | | Parser_c.TCommentNewline _ -> () | |
535 | ||
536 | | Parser_c.TComment _ | |
537 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
538 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) | |
539 | | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _) | |
540 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> | |
541 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" | |
542 | (TH.line_of_tok t) str) | |
543 | | _ -> raise (Impossible 137) | |
544 | ); | |
545 | T2 (t, Min adj, idx, hint) | |
546 | (* patch: coccinelle *) | |
547 | | T2 (t, Min adj, idx, hint) as x -> x | |
8babbc8f | 548 | | Fake2 _ as x -> x |
abad11c5 | 549 | | _ -> raise (Impossible 138) |
34e49164 | 550 | |
feec80c3 | 551 | (* don't touch ifdefs, done after *) |
708f4980 | 552 | let set_minus_comment_or_plus adj = function |
abad11c5 C |
553 | | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 |
554 | | Unindent_cocci2 _ | EatSpace2 as x -> x | |
708f4980 | 555 | | x -> set_minus_comment adj x |
34e49164 | 556 | |
abad11c5 C |
557 | let is_minus = function |
558 | | T2 (_, Min _, _, _) -> true | |
559 | | _ -> false | |
560 | ||
fc1ad971 | 561 | let drop_minus xs = |
abad11c5 | 562 | xs +> exclude is_minus |
fc1ad971 | 563 | |
8babbc8f | 564 | let drop_expanded xs = |
abad11c5 C |
565 | xs +> exclude (function |
566 | | T2 (t,_,_,_) when TH.is_expanded t -> true | |
8babbc8f C |
567 | | _ -> false |
568 | ) | |
569 | ||
570 | let drop_fake xs = | |
abad11c5 | 571 | xs +> exclude (function |
8babbc8f | 572 | | Fake2 _ -> true |
34e49164 C |
573 | | _ -> false |
574 | ) | |
5626f154 C |
575 | |
576 | let remove_minus_and_between_and_expanded_and_fake xs = | |
577 | ||
8babbc8f C |
578 | (* get rid of expanded tok *) |
579 | let xs = drop_expanded xs in | |
34e49164 | 580 | |
abad11c5 C |
581 | let minus_or_comment x = |
582 | is_minus x or is_minusable_comment x in | |
113803cf | 583 | |
abad11c5 C |
584 | let minus_or_comment_nocpp x = |
585 | is_minus x or is_minusable_comment_nocpp x in | |
fc1ad971 C |
586 | |
587 | let common_adj (index1,adj1) (index2,adj2) = | |
8babbc8f C |
588 | let same_adj = (* same adjacency info *) |
589 | match (adj1,adj2) with | |
abad11c5 | 590 | | (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2 |
8babbc8f C |
591 | | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in |
592 | same_adj && | |
fc1ad971 | 593 | (* non-empty intersection of witness trees *) |
abad11c5 | 594 | not ((inter_set index1 index2) = []) in |
fc1ad971 | 595 | |
fc1ad971 | 596 | (* new idea: collects regions not containing non-space context code |
abad11c5 C |
597 | if two adjacent adjacent minus tokens satisfy common_adj then delete |
598 | all spaces, comments etc between them | |
599 | if two adjacent minus tokens do not satisfy common_adj only delete | |
600 | the spaces between them if there are no comments, etc. | |
601 | if the region contain no plus code and is both preceded and followed | |
602 | by a newline, delete the initial newline. *) | |
fc1ad971 C |
603 | |
604 | let rec adjust_around_minus = function | |
abad11c5 C |
605 | | [] -> [] |
606 | | (T2(Parser_c.TCommentNewline c,_b,_i,_h) as x):: | |
607 | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs -> | |
608 | let (minus_list,rest) = span not_context (t1::xs) in | |
609 | let contains_plus = List.exists is_plus minus_list in | |
610 | let x = | |
611 | match List.rev minus_list with | |
612 | | (T2(Parser_c.TCommentNewline c,_b,_i,_h))::rest | |
613 | when List.for_all minus_or_comment minus_list -> | |
614 | set_minus_comment_or_plus adj1 x | |
615 | | _ -> x in | |
616 | x :: adjust_within_minus contains_plus minus_list | |
617 | @ adjust_around_minus rest | |
618 | | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs -> | |
619 | let (minus_list,rest) = span not_context (t1::xs) in | |
620 | let contains_plus = List.exists is_plus minus_list in | |
621 | adjust_within_minus contains_plus minus_list | |
622 | @ adjust_around_minus rest | |
c491d8ee | 623 | | x::xs -> |
abad11c5 | 624 | x :: adjust_around_minus xs |
fc1ad971 | 625 | and adjust_within_minus cp (* contains plus *) = function |
abad11c5 C |
626 | | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs -> |
627 | let not_minus = function T2(_,Min _,_,_) -> false | _ -> true in | |
628 | let (not_minus_list,rest) = span not_minus xs in | |
629 | t1 :: | |
630 | (match rest with | |
631 | | ((Fake2(Min adj2) | T2(_,Min adj2,_,_)) as t2)::xs -> | |
632 | if common_adj adj1 adj2 | |
633 | || not cp && List.for_all is_whitespace not_minus_list | |
634 | then | |
635 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) | |
636 | @ (adjust_within_minus cp (t2::xs)) | |
637 | else | |
638 | not_minus_list | |
639 | @ (adjust_within_minus cp (t2::xs)) | |
640 | | _ -> | |
641 | if cp | |
642 | then xs | |
643 | else | |
644 | (* remove spaces after removed stuff, eg a comma after a | |
645 | function argument *) | |
646 | (let (spaces,rest) = span is_space xs in | |
647 | (List.map (set_minus_comment_or_plus adj1) spaces) | |
648 | @ rest) | |
649 | ) | |
fc1ad971 C |
650 | | xs -> failwith "should always start with minus" |
651 | and not_context = function | |
abad11c5 | 652 | | (T2(_,Ctx,_,_) as x) when not (is_minusable_comment x) -> false |
fc1ad971 C |
653 | | _ -> true |
654 | and is_plus = function | |
abad11c5 | 655 | | C2 _ | Comma _ | Cocci2 _ -> true |
fc1ad971 | 656 | | _ -> false in |
34e49164 | 657 | |
fc1ad971 | 658 | let xs = adjust_around_minus xs in |
34e49164 | 659 | |
8babbc8f C |
660 | (* get rid of fake tok *) |
661 | let xs = drop_fake xs in | |
662 | ||
113803cf | 663 | (* this drops blank lines after a brace introduced by removing code *) |
fc1ad971 | 664 | let minus_or_comment_nonl = function |
abad11c5 C |
665 | | T2(_,Min adj,_,_) -> true |
666 | | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> false | |
fc1ad971 C |
667 | | x -> is_minusable_comment x in |
668 | ||
113803cf | 669 | let rec adjust_after_brace = function |
abad11c5 C |
670 | | [] -> [] |
671 | | ((T2(_,Ctx,_,_)) as x)::((T2(_,Min adj,_,_)::_) as xs) | |
672 | when str_of_token2 x =$= "{" -> | |
673 | let (between_minus,rest) = span minus_or_comment_nonl xs in | |
674 | let (newlines,rest) = span is_whitespace rest in | |
675 | let (drop_newlines,last_newline) = | |
676 | let rec loop = function | |
677 | | [] -> ([],[]) | |
678 | | ((T2(Parser_c.TCommentNewline _,_b,_i,_h)) as x) :: rest -> | |
679 | (List.rev rest,[x]) | |
680 | | x::xs -> | |
681 | let (drop_newlines,last_newline) = loop xs in | |
682 | (drop_newlines,x::last_newline) in | |
683 | loop (List.rev newlines) in | |
684 | x :: between_minus | |
685 | @ List.map (set_minus_comment adj) drop_newlines | |
686 | @ last_newline | |
687 | @ adjust_after_brace rest | |
688 | | x::xs -> x :: (adjust_after_brace xs) in | |
113803cf C |
689 | |
690 | let xs = adjust_after_brace xs in | |
691 | ||
fc1ad971 | 692 | (* search backwards from context } over spaces until reaching a newline. |
abad11c5 C |
693 | then go back over all minus code until reaching some context or + code. |
694 | get rid of all intervening spaces, newlines, and comments | |
695 | input is reversed *) | |
fc1ad971 | 696 | let rec adjust_before_brace = function |
abad11c5 C |
697 | | [] -> [] |
698 | | ((T2(t,Ctx,_,_)) as x)::xs | |
699 | when str_of_token2 x =$= "}" or is_newline x -> | |
700 | let (outer_spaces,rest) = span is_space xs in | |
701 | x :: outer_spaces @ | |
702 | (match rest with | |
703 | | ((T2 (Parser_c.TCommentNewline _,Ctx,_i,_h)) as h) :: | |
704 | (* the rest of this code is the same as from_newline below | |
705 | but merging them seems to be error prone... *) | |
706 | ((T2 (t, Min adj, idx, hint)) as m) :: rest -> | |
707 | let (spaces,rest) = span minus_or_comment_nocpp rest in | |
708 | h :: m :: | |
709 | (List.map (set_minus_comment adj) spaces) @ | |
710 | (adjust_before_brace rest) | |
711 | | _ -> adjust_before_brace rest | |
712 | ) | |
fc1ad971 C |
713 | | x::xs -> x :: (adjust_before_brace xs) in |
714 | ||
715 | let from_newline = function | |
abad11c5 C |
716 | | ((T2 (t, Min adj, idx, hint)) as m) :: rest -> |
717 | let (spaces,rest) = span minus_or_comment_nocpp rest in | |
718 | m :: | |
719 | (List.map (set_minus_comment adj) spaces) @ | |
720 | (adjust_before_brace rest) | |
721 | | ((T2 (t0,Ctx, idx0,h0)) as m0) :: | |
722 | ((T2 (t,Min adj,idx,h)) as m) :: rest | |
723 | when TH.str_of_tok t0 = "" -> | |
724 | (* This is for the case of a #define that is completely deleted, | |
725 | because a #define has a strange EOL token at the end. | |
726 | We hope there i no other kind of token that is represented by | |
727 | "", but it seems like changing the kind of token might break | |
728 | the end of entity recognition in the C parser. | |
729 | See parsing_hacks.ml *) | |
730 | let (spaces,rest) = span minus_or_comment_nocpp rest in | |
731 | m0 :: m :: | |
732 | (List.map (set_minus_comment adj) spaces) @ | |
733 | (adjust_before_brace rest) | |
fc1ad971 C |
734 | | rest -> adjust_before_brace rest in |
735 | ||
736 | let xs = List.rev (from_newline (List.rev xs)) in | |
34e49164 | 737 | |
d6ce1786 | 738 | let cleanup_ifdefs toks = |
abad11c5 | 739 | (* TODO: these functions are horrid, but using tokens caused circularity *) |
d6ce1786 | 740 | let is_ifdef = function |
d6ce1786 | 741 | | T2((Parser_c.TCommentCpp |
abad11c5 C |
742 | (Token_c.CppIfDirective Token_c.IfDef, _)),m,idx,_) -> true |
743 | | T2((Parser_c.TCommentCpp | |
744 | (Token_c.CppIfDirective Token_c.IfDef0, _)),m,idx,_) -> true | |
d6ce1786 C |
745 | | t -> false in |
746 | let is_else = function | |
abad11c5 C |
747 | | T2((Parser_c.TCommentCpp |
748 | (Token_c.CppIfDirective Token_c.Else, _)),m,idx,_) -> true | |
d6ce1786 C |
749 | | _ -> false in |
750 | let is_endif = function | |
abad11c5 C |
751 | | T2((Parser_c.TCommentCpp |
752 | (Token_c.CppIfDirective Token_c.Endif, _)),m,idx,_) -> true | |
d6ce1786 C |
753 | | _ -> false in |
754 | let add t = function | |
abad11c5 | 755 | | l::rest -> (t::l)::rest |
d6ce1786 C |
756 | | _ -> failwith "not possible" in |
757 | let rec parse_ifdef acc_keywords acc_code stack = function | |
abad11c5 | 758 | | [] -> (None,acc_keywords,acc_code) |
d6ce1786 | 759 | | t::rest when is_else t -> |
abad11c5 C |
760 | (match stack with |
761 | | [] -> parse_ifdef (t::acc_keywords) ([]::acc_code) stack rest | |
762 | | _ -> parse_ifdef acc_keywords (add t acc_code) stack rest | |
763 | ) | |
d6ce1786 | 764 | | t::rest when is_endif t -> |
abad11c5 C |
765 | (match stack with |
766 | | [] -> ((Some (t,rest)),acc_keywords,acc_code) | |
767 | | _::stack -> parse_ifdef acc_keywords (add t acc_code) stack rest | |
768 | ) | |
d6ce1786 | 769 | | t::rest when is_ifdef t -> |
abad11c5 | 770 | parse_ifdef acc_keywords (add t acc_code) (()::stack) rest |
d6ce1786 C |
771 | | t::rest -> parse_ifdef acc_keywords (add t acc_code) stack rest in |
772 | let unminus = function | |
abad11c5 | 773 | | T2 (t,Min adj,idx,hint) -> T2 (t,Ctx,idx,hint) |
d6ce1786 | 774 | | x -> x in |
d6ce1786 | 775 | let rec loop = function |
abad11c5 | 776 | | [] -> [] |
d6ce1786 | 777 | | t::rest when is_ifdef t -> |
abad11c5 C |
778 | let (ender,acc_keywords,acc_code) = |
779 | parse_ifdef [t] [[]] [] rest in | |
780 | let acc_code = List.map loop acc_code in | |
781 | let merge = (* args reversed *) | |
782 | List.fold_left2 | |
783 | (fun prev kwd code -> kwd :: (List.rev code) @ prev) | |
784 | [] in | |
785 | (match ender with | |
786 | | None -> merge (List.map unminus acc_keywords) acc_code | |
787 | | Some(endif,rest) -> | |
788 | let rest = loop rest in | |
789 | if List.for_all is_minus (endif :: acc_keywords) | |
790 | then (merge acc_keywords acc_code) @ (endif :: rest) | |
791 | else | |
792 | (merge (List.map unminus acc_keywords) acc_code) @ | |
793 | ((unminus endif) :: rest) | |
794 | ) | |
d6ce1786 C |
795 | | x::xs -> x :: loop xs in |
796 | loop toks in | |
abad11c5 | 797 | |
d6ce1786 C |
798 | let xs = cleanup_ifdefs xs in |
799 | let xs = drop_minus xs in | |
800 | xs | |
feec80c3 | 801 | |
993936c0 | 802 | (* things that should not be followed by space - boundary between SmPL |
abad11c5 | 803 | code and C code *) |
993936c0 C |
804 | let adjust_eat_space toks = |
805 | let rec loop = function | |
abad11c5 | 806 | | [] -> [] |
993936c0 C |
807 | | EatSpace2 :: x :: rest when is_space x -> loop rest |
808 | | EatSpace2 :: rest -> loop rest | |
809 | | x :: xs -> x :: loop xs in | |
810 | loop toks | |
811 | ||
faf9a90c C |
812 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
813 | let adjust_before_semicolon toks = | |
814 | let toks = List.rev toks in | |
002099fc | 815 | let rec search_semic = function |
abad11c5 C |
816 | | [] -> [] |
817 | | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs | |
818 | when List.mem (str_of_token2 x) [";";")";","] -> | |
819 | x :: search_semic (search_minus false xs) | |
002099fc C |
820 | | x::xs -> x :: search_semic xs |
821 | and search_minus seen_minus xs = | |
abad11c5 | 822 | let (spaces, rest) = span is_space xs in |
002099fc C |
823 | (* only delete spaces if something is actually deleted *) |
824 | match rest with | |
abad11c5 | 825 | | ((T2(_,Min _,_,_)) as a)::rerest -> a :: search_minus true rerest |
002099fc C |
826 | | _ -> if seen_minus then rest else xs in |
827 | List.rev (search_semic toks) | |
34e49164 | 828 | |
8babbc8f C |
829 | (* normally, in C code, a ( is not followed by a space or newline *) |
830 | let adjust_after_paren toks = | |
831 | let rec search_paren = function | |
abad11c5 C |
832 | | [] -> [] |
833 | | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs | |
834 | when List.mem (str_of_token2 x) ["("] (* other things? *) -> | |
835 | x :: search_paren (search_minus false xs) | |
8babbc8f C |
836 | | x::xs -> x :: search_paren xs |
837 | and search_minus seen_minus xs = | |
abad11c5 | 838 | let (spaces, rest) = span is_whitespace xs in |
8babbc8f C |
839 | (* only delete spaces if something is actually deleted *) |
840 | match rest with | |
abad11c5 C |
841 | | ((T2(_,Min _,_,_)) as a)::rerest -> (* minus *) |
842 | a :: search_minus true rerest | |
843 | | ((T2(_,Ctx,_,_)) as a)::rerest | |
844 | when seen_minus && str_of_token2 a = "," -> | |
845 | (* comma after ( will be deleted, so consider it as minus code | |
846 | already *) | |
847 | a :: search_minus true rerest | |
8babbc8f C |
848 | | _ -> if seen_minus then rest else xs in (* drop trailing space *) |
849 | search_paren toks | |
850 | ||
65038c61 C |
851 | (* this is for the case where braces are added around an if branch *) |
852 | let paren_then_brace toks = | |
853 | let rec search_paren = function | |
abad11c5 C |
854 | | [] -> [] |
855 | | ((T2(_,Ctx,_,_)) as x)::xs | |
856 | when List.mem (str_of_token2 x) [")"] -> | |
857 | x :: search_paren (search_plus xs) | |
65038c61 C |
858 | | x::xs -> x :: search_paren xs |
859 | and search_plus xs = | |
abad11c5 C |
860 | let (spaces, rest) = span is_space xs in |
861 | let (nls, rest) = span is_newline rest in | |
65038c61 | 862 | match rest with |
abad11c5 C |
863 | (* move the brace up to the previous line *) |
864 | | ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) -> | |
865 | spaces @ x :: nls @ rest | |
65038c61 C |
866 | | _ -> xs in |
867 | search_paren toks | |
868 | ||
abad11c5 | 869 | let is_ident_like s = s ==~ regexp_alpha |
34e49164 | 870 | |
aba5c457 | 871 | let rec drop_space_at_endline = function |
abad11c5 | 872 | | [] -> [] |
aba5c457 | 873 | | [x] -> [x] |
190f1acf | 874 | | (C2 " ") :: |
abad11c5 C |
875 | ((((T2(Parser_c.TCommentSpace _,Ctx,_,_)) | Cocci2("\n",_,_,_,_) | |
876 | (T2(Parser_c.TCommentNewline _,Ctx,_,_))) :: _) as rest) -> | |
877 | (* when unparse_cocci doesn't know whether space is needed *) | |
878 | drop_space_at_endline rest | |
879 | | ((T2(Parser_c.TCommentSpace _,Ctx,_i,_h)) as a)::rest -> | |
880 | let (outer_spaces,rest) = span is_space rest in | |
881 | let minus_or_comment_or_space_nocpp = function | |
882 | | T2(_,Min adj,_,_) -> true | |
883 | | (T2(Parser_c.TCommentSpace _,Ctx,_i,_)) -> true | |
884 | | (T2(Parser_c.TCommentNewline _,Ctx,_i,_)) -> false | |
885 | | x -> false in | |
886 | let (minus,rest) = span minus_or_comment_or_space_nocpp rest in | |
887 | let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in | |
888 | if List.exists is_minus minus | |
889 | then | |
890 | match rest with | |
891 | | ((T2(Parser_c.TCommentNewline _,Ctx,_i,_h)) as a)::rest -> | |
892 | (* drop trailing spaces *) | |
893 | minus @ a :: (drop_space_at_endline rest) | |
894 | | _ -> fail () | |
895 | else fail () | |
8f657093 | 896 | | a :: rest -> |
abad11c5 | 897 | a :: drop_space_at_endline rest |
aba5c457 C |
898 | |
899 | (* if a removed ( is between two tokens, then add a space *) | |
900 | let rec paren_to_space = function | |
abad11c5 | 901 | | [] -> [] |
aba5c457 C |
902 | | [x] -> [x] |
903 | | [x;y] -> [x;y] | |
abad11c5 C |
904 | | ((T2(_,Ctx,_,_)) as a):: |
905 | ((T2(t,Min _,_,_)) as b):: | |
906 | ((T2(_,Ctx,_,_)) as c)::rest | |
aba5c457 | 907 | when not (is_whitespace a) && TH.str_of_tok t = "(" -> |
abad11c5 | 908 | a :: b :: (C2 " ") :: (paren_to_space (c :: rest)) |
aba5c457 C |
909 | | a :: rest -> a :: (paren_to_space rest) |
910 | ||
ae4735db | 911 | let rec add_space xs = |
34e49164 C |
912 | match xs with |
913 | | [] -> [] | |
914 | | [x] -> [x] | |
5636bb2c | 915 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
708f4980 | 916 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && |
abad11c5 C |
917 | not (lnx = -1) && not (rcolx = -1) && lnx = lny && rcolx < lcoly -> |
918 | (* this only works within a line. could consider whether | |
919 | something should be done to add newlines too, rather than | |
920 | printing them explicitly in unparse_cocci. *) | |
921 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) | |
65038c61 C |
922 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
923 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && | |
abad11c5 C |
924 | not (lnx = -1) && not (rcolx = -1) && lnx < lny -> |
925 | (* this only works within a line. could consider whether | |
926 | something should be done to add newlines too, rather than | |
927 | printing them explicitly in unparse_cocci. *) | |
928 | x::C2 (String.make (lny-lnx) '\n'):: | |
929 | C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *) | |
930 | add_space (y::xs) | |
931 | | ((T2(_,Ctx,_,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *) | |
932 | let sx = str_of_token2 x in | |
933 | let sy = str_of_token2 y in | |
934 | if is_ident_like sx && (is_ident_like sy or List.mem sy ["="]) | |
935 | then x::C2 " "::(add_space (y::xs)) | |
936 | else x::(add_space (y::xs)) | |
1eddfd50 | 937 | | x::y::xs -> (* not boundary, not sure if it is possible *) |
abad11c5 C |
938 | let sx = str_of_token2 x in |
939 | let sy = str_of_token2 y in | |
940 | if is_ident_like sx && is_ident_like sy | |
941 | then x::C2 " "::(add_space (y::xs)) | |
942 | else x::(add_space (y::xs)) | |
34e49164 | 943 | |
c491d8ee C |
944 | (* A fake comma is added at the end of an unordered initlist or a enum |
945 | decl, if the initlist or enum doesn't already end in a comma. This is only | |
946 | needed if there is + code, ie if we see Cocci after it in the code sequence *) | |
947 | ||
948 | let rec drop_end_comma = function | |
abad11c5 | 949 | | [] -> [] |
c491d8ee C |
950 | | [x] -> [x] |
951 | | ((Comma ",") as x) :: rest -> | |
abad11c5 C |
952 | let (newlines,rest2) = span is_whitespace rest in |
953 | (match rest2 with | |
954 | | (Cocci2 _) :: _ -> x :: drop_end_comma rest | |
955 | | _ -> drop_end_comma rest | |
956 | ) | |
c491d8ee C |
957 | | x :: xs -> x :: drop_end_comma xs |
958 | ||
5636bb2c C |
959 | (* The following only works for the outermost function call. Stack records |
960 | the column of all open parentheses. Space_cell contains the most recent | |
961 | comma in the outermost function call. The goal is to decide whether this | |
962 | should be followed by a space or a newline and indent. *) | |
963 | let add_newlines toks tabbing_unit = | |
abad11c5 C |
964 | (* the following is for strings that may contain newline or tabs *) |
965 | let string_length s count = | |
966 | let l = list_of_string s in | |
967 | List.fold_left | |
968 | (function count -> | |
969 | function | |
970 | | '\t' -> count + 8 | |
971 | | '\n' -> 0 | |
972 | | c -> count + 1) | |
973 | count l in | |
5636bb2c C |
974 | let create_indent n = |
975 | let (tu,tlen) = | |
976 | match tabbing_unit with | |
abad11c5 | 977 | | Some ("\t",_) -> ("\t",8) |
5636bb2c | 978 | | Some ("",_) -> ("\t",8) (* not sure why... *) |
abad11c5 | 979 | | Some (s,_) -> (s,string_length s 0) (* assuming only tabs or spaces *) |
5636bb2c C |
980 | | None -> ("\t",8) in |
981 | let rec loop seen = | |
982 | if seen + tlen <= n | |
983 | then tu ^ loop (seen + tlen) | |
984 | else String.make (n-seen) ' ' in | |
985 | loop 0 in | |
986 | let check_for_newline count x = function | |
abad11c5 C |
987 | | Some (start,space_cell) when count > Flag_parsing_c.max_width -> |
988 | space_cell := "\n"^(create_indent x); | |
989 | Some (x + (count - start)) | |
5636bb2c | 990 | | _ -> None in |
abad11c5 C |
991 | let start_box stack space_cell count s = |
992 | let count = string_length s count in | |
993 | (count,count::stack,space_cell) in | |
994 | let end_box stack space_cell count s = | |
995 | (* this assumes that start_box and end_box are matched, but this is not | |
996 | necessarily the case, if ( is modified and ) is context code *) | |
997 | let count = string_length s count in | |
998 | match stack with | |
999 | | [x] -> | |
1000 | (match check_for_newline count x space_cell with | |
1001 | | Some count -> (count,[],None) | |
1002 | | None -> (count,[],None) | |
1003 | ) | |
1004 | | [] -> (count,stack,space_cell) | |
1005 | | _ -> (count,List.tl stack,space_cell) in | |
1006 | let rec loop ((stack,space_cell) as info) count = function | |
1007 | | [] -> [] | |
1008 | | ((Cocci2(s,line,lcol,rcol,hint)) as a):: (* hint can't be start *) | |
1009 | (T2(commatok,Ctx,idx,_)):: | |
1010 | (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,_,_)) :: | |
1011 | (((T2(codetok,Ctx,_,_)) :: _) as xs) | |
1012 | when | |
1013 | (TH.str_of_tok commatok) = "," && | |
1014 | (TH.str_of_tok sptok) = " " && | |
1015 | ((List.length stack = 1) or (* not super elegant... *) | |
1016 | (* check if the Cocci2 token is a ), need double treatment *) | |
1017 | (List.length stack = 2) && (hint = Some Unparse_cocci.EndBox)) -> | |
1018 | (* something large added before a comma *) | |
1019 | let stack = (* do the work of end_box in the length 2 case *) | |
1020 | if List.length stack = 2 | |
1021 | then List.tl stack | |
1022 | else stack in | |
1023 | let x = List.hd stack in | |
1024 | let sp = ref " " in | |
1025 | let cocci_count = string_length s count in | |
1026 | let space_cell = Some (cocci_count+1,sp) in (* count before space *) | |
1027 | let newcount = cocci_count + 2 in (* count incuding space *) | |
1028 | let future_count = | |
1029 | string_length (TH.str_of_tok codetok) newcount in | |
1030 | let b = T2(commatok,Ctx,idx, | |
1031 | Some (Unparse_cocci.SpaceOrNewline sp)) in | |
1032 | (match check_for_newline future_count x space_cell with | |
1033 | | Some count -> a :: b :: loop (stack,Some (x,sp)) count xs | |
1034 | | None -> a :: b :: loop (stack,Some (newcount,sp)) newcount xs | |
1035 | ) | |
1036 | | (T2(commatok,Ctx,_,_)):: | |
1037 | (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,idx,_)) :: | |
1038 | (((Cocci2(s,line,lcol,rcol,hint))::_) as xs) | |
1039 | when (TH.str_of_tok commatok) = "," && (TH.str_of_tok sptok) = " " && | |
1040 | List.length stack = 1 (* not super elegant... *) -> | |
1041 | (* something large added after a comma *) | |
1042 | let x = List.hd stack in | |
1043 | let sp = ref " " in | |
1044 | let space_cell = Some (count+1,sp) in (* count before space *) | |
1045 | let newcount = count + 2 in (* count incuding space *) | |
1046 | let future_count = string_length s newcount in | |
1047 | let a = T2(commatok,Ctx,idx, | |
1048 | Some (Unparse_cocci.SpaceOrNewline sp)) in | |
1049 | (match check_for_newline future_count x space_cell with | |
1050 | | Some count -> a :: loop (stack,Some (x,sp)) count xs | |
1051 | | None -> a :: loop (stack,Some (newcount,sp)) newcount xs | |
1052 | ) | |
1053 | | ((T2(tok,Ctx,idx,_)) as a)::xs -> | |
1054 | (* let (stack,space_cell) = info in *) | |
1055 | (match TH.str_of_tok tok with | |
1056 | | "=" as s -> | |
1057 | let (spaces,rest) = span is_space xs in | |
1058 | (match rest with | |
1059 | | ((T2(tok,Ctx,_,_)) as b)::ixs -> | |
1060 | (match TH.str_of_tok tok with | |
1061 | | "{" -> | |
1062 | let newcount = | |
1063 | List.fold_left | |
1064 | (function prev -> | |
1065 | function | |
1066 | | (T2(tok,_b,_i,_h)) -> | |
1067 | string_length (TH.str_of_tok tok) prev | |
1068 | | _ -> failwith "not possible") | |
1069 | count spaces in | |
1070 | let front = a :: spaces @ [b] in | |
1071 | let (newcount,newstack,newspacecell) = | |
1072 | start_box stack space_cell newcount "{" in | |
1073 | front @ loop (newstack,newspacecell) newcount ixs | |
1074 | | s -> a :: loop info (string_length s count) xs | |
1075 | ) | |
1076 | | _ -> a :: loop info (string_length s count) xs | |
1077 | ) | |
1078 | | "(" as s -> | |
1079 | let (newcount,newstack,newspacecell) = | |
1080 | start_box stack space_cell count s in | |
1081 | a :: loop (newstack,newspacecell) newcount xs | |
1082 | | ")" as s -> | |
1083 | let (newcount,newstack,newspacecell) = | |
1084 | end_box stack space_cell count s in | |
1085 | a :: loop (newstack,newspacecell) newcount xs | |
1086 | | "{" as s when not (stack = []) -> | |
1087 | (* [] case means statement braces *) | |
1088 | let (newcount,newstack,newspacecell) = | |
1089 | start_box stack space_cell count s in | |
1090 | a :: loop (newstack,newspacecell) newcount xs | |
1091 | | "}" as s when not (stack = []) -> | |
1092 | (* [] case means statement braces *) | |
1093 | let (newcount,newstack,newspacecell) = | |
1094 | end_box stack space_cell count s in | |
1095 | a :: loop (newstack,newspacecell) newcount xs | |
1096 | | s -> a :: loop info (string_length s count) xs | |
1097 | ) | |
5636bb2c | 1098 | | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs -> |
abad11c5 C |
1099 | let (stack,space_cell) = info in |
1100 | let rest = | |
1101 | match hint with | |
1102 | | None -> loop info (string_length s count) xs | |
1103 | | Some Unparse_cocci.StartBox -> | |
1104 | let (newcount,newstack,newspacecell) = | |
1105 | start_box stack space_cell count s in | |
1106 | loop (newstack,newspacecell) newcount xs | |
1107 | | Some Unparse_cocci.EndBox -> | |
1108 | let (newcount,newstack,newspacecell) = | |
1109 | end_box stack space_cell count s in | |
1110 | loop (newstack,newspacecell) newcount xs | |
1111 | | Some (Unparse_cocci.SpaceOrNewline sp) -> | |
1112 | let count = string_length s (count + 1 (*space*)) in | |
1113 | (match stack with | |
1114 | | [x] -> | |
1115 | (match check_for_newline count x space_cell with | |
1116 | | Some count -> loop (stack,Some (x,sp)) count xs | |
1117 | | None -> loop (stack,Some (count,sp)) count xs | |
1118 | ) | |
1119 | | _ -> loop info count xs | |
1120 | ) in | |
1121 | a :: rest | |
1122 | | ((T2(tok,_,_,_)) as a)::xs -> | |
1123 | a :: loop info (string_length (TH.str_of_tok tok) count) xs | |
5636bb2c | 1124 | | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs |
c491d8ee | 1125 | | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs |
8babbc8f | 1126 | | Fake2 _ :: _ | Indent_cocci2 :: _ |
993936c0 | 1127 | | Unindent_cocci2 _::_ | EatSpace2::_ -> |
abad11c5 | 1128 | failwith "unexpected fake, indent, unindent, or eatspace" in |
5636bb2c | 1129 | let redo_spaces prev = function |
abad11c5 C |
1130 | | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) -> |
1131 | C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev | |
1132 | | T2(tok,min,idx,Some (Unparse_cocci.SpaceOrNewline sp)) -> | |
1133 | C2 !sp :: T2(tok,min,idx,None) :: prev | |
5636bb2c C |
1134 | | t -> t::prev in |
1135 | (match !Flag_parsing_c.spacing with | |
abad11c5 C |
1136 | | Flag_parsing_c.SMPL -> toks |
1137 | | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)) | |
1138 | ) | |
34e49164 C |
1139 | |
1140 | (* When insert some new code, because of a + in a SP, we must add this | |
abad11c5 C |
1141 | * code at the right place, with the good indentation. So each time we |
1142 | * encounter some spacing info, with some newline, we maintain the | |
1143 | * current indentation level used. | |
1144 | * | |
1145 | * TODO problems: not accurate. ex: TODO | |
1146 | * | |
1147 | * TODO: if in #define region, should add a \ \n | |
1148 | *) | |
ae4735db | 1149 | let new_tabbing2 space = |
abad11c5 | 1150 | list_of_string space |
34e49164 | 1151 | +> List.rev |
abad11c5 | 1152 | +> take_until (fun c -> c =<= '\n') |
34e49164 C |
1153 | +> List.rev |
1154 | +> List.map string_of_char | |
1155 | +> String.concat "" | |
1156 | ||
ae4735db | 1157 | let new_tabbing a = |
abad11c5 | 1158 | profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) |
34e49164 C |
1159 | |
1160 | ||
ae4735db | 1161 | let rec adjust_indentation xs = |
aa721442 | 1162 | |
65038c61 | 1163 | let _current_tabbing = ref ([] : string list) in |
34e49164 C |
1164 | let tabbing_unit = ref None in |
1165 | ||
1166 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
1167 | ||
1168 | (* try to pick a tabbing unit for the plus code *) | |
1169 | let adjust_tabbing_unit old_tab new_tab = | |
b1b2de81 | 1170 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
34e49164 C |
1171 | then |
1172 | let old_tab = list_of_string old_tab in | |
1173 | let new_tab = list_of_string new_tab in | |
1174 | let rec loop = function | |
abad11c5 C |
1175 | | ([],new_tab) -> |
1176 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
1177 | | (_,[]) -> failwith "not possible" | |
1178 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
34e49164 C |
1179 | loop (old_tab,new_tab) in |
1180 | ||
65038c61 | 1181 | (* |
abad11c5 C |
1182 | let remtab tu current_tab = |
1183 | let current_tab = List.rev(list_of_string current_tab) in | |
1184 | let rec loop = function | |
1185 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
1186 | | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-) | |
1187 | "" | |
1188 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) | |
1189 | | (_,ns) -> (-* mismatch; remove what we can *-) | |
1190 | string_of_list (List.rev ns) in | |
1191 | loop (tu,current_tab) in | |
65038c61 | 1192 | *) |
34e49164 C |
1193 | |
1194 | let rec find_first_tab started = function | |
abad11c5 C |
1195 | | [] -> () |
1196 | | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" -> | |
1197 | find_first_tab true xs | |
1198 | (* patch: coccinelle *) | |
1199 | | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::_ | |
34e49164 | 1200 | when started -> |
abad11c5 C |
1201 | let s = str_of_token2 x +> new_tabbing in |
1202 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
34e49164 C |
1203 | | x::xs -> find_first_tab started xs in |
1204 | find_first_tab false xs; | |
1205 | ||
1eddfd50 | 1206 | let rec balanced ct = function |
abad11c5 C |
1207 | | [] -> ct >= 0 |
1208 | | ((T2(tok,_,_,_)) as x)::xs -> | |
1209 | (match str_of_token2 x with | |
1210 | | "(" -> balanced (ct+1) xs | |
1211 | | ")" -> balanced (ct-1) xs | |
1212 | | _ -> balanced ct xs | |
1213 | ) | |
1eddfd50 C |
1214 | | x::xs -> balanced ct xs in |
1215 | ||
97111a47 C |
1216 | let update_tabbing started s x = |
1217 | let old_tabbing = !_current_tabbing in | |
1218 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]); | |
1219 | (* only trust the indentation after the first { *) | |
1220 | if started | |
1221 | then | |
1222 | adjust_tabbing_unit | |
abad11c5 C |
1223 | (String.concat "" old_tabbing) |
1224 | (String.concat "" !_current_tabbing) in | |
97111a47 | 1225 | |
ae4735db | 1226 | let rec aux started xs = |
34e49164 | 1227 | match xs with |
abad11c5 C |
1228 | | [] -> [] |
1229 | (* patch: coccinelle *) | |
1230 | | ((T2 (Parser_c.TCommentNewline s,_,_,_)) as x):: | |
97111a47 | 1231 | Unindent_cocci2(false)::xs -> |
abad11c5 C |
1232 | update_tabbing started s x; |
1233 | (C2 "\n")::aux started xs | |
8f657093 | 1234 | | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs -> |
abad11c5 C |
1235 | (C2 "\n")::aux started xs |
1236 | | ((T2 (tok,_,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _, _)):: | |
5636bb2c | 1237 | ((Cocci2 ("{",_,_,_,_)) as a)::xs |
708f4980 | 1238 | when started && str_of_token2 x =$= ")" -> |
abad11c5 C |
1239 | (* to be done for if, etc, but not for a function header *) |
1240 | x::(C2 " ")::a::(aux started xs) | |
1241 | | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::xs | |
97111a47 | 1242 | when |
abad11c5 C |
1243 | balanced 0 (fst(span (function x -> not(is_newline x)) xs)) -> |
1244 | update_tabbing started s x; | |
1245 | let coccis_rest = span all_coccis xs in | |
1246 | (match coccis_rest with | |
1247 | | (_::_,((T2 (tok,_,_,_)) as y)::_) when str_of_token2 y =$= "}" -> | |
1248 | (* the case where cocci code has been added before a close } *) | |
1249 | x::aux started (Indent_cocci2::xs) | |
1250 | | _ -> x::aux started xs | |
1251 | ) | |
65038c61 C |
1252 | | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
1253 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL -> | |
abad11c5 C |
1254 | let tu = String.make (lcoly-1) ' ' in |
1255 | _current_tabbing := tu::(!_current_tabbing); | |
1256 | C2 (tu)::aux started (y::xs) | |
34e49164 | 1257 | | Indent_cocci2::xs -> |
abad11c5 C |
1258 | (match !tabbing_unit with |
1259 | | None -> aux started xs | |
1260 | | Some (tu,_) -> | |
1261 | _current_tabbing := tu::(!_current_tabbing); | |
1262 | (* can't be C2, for later phases *) | |
1263 | Cocci2 (tu,-1,-1,-1,None)::aux started xs | |
1264 | ) | |
1265 | | Unindent_cocci2(permanent)::((Cocci2("\n",_,_,_,_)) as x)::xs -> | |
1266 | (* seems only relevant if there is a following cocci newline *) | |
1267 | (match !_current_tabbing with | |
1268 | | [] -> aux started xs | |
1269 | | _::new_tabbing -> | |
1270 | let s = String.concat "" new_tabbing in | |
1271 | _current_tabbing := new_tabbing; | |
1272 | x::Cocci2 (s,-1,-1,-1,None)::aux started xs | |
1273 | ) | |
1274 | | Unindent_cocci2(permanent)::xs -> aux started xs | |
34e49164 | 1275 | (* border between existing code and cocci code *) |
abad11c5 | 1276 | | ((T2 (tok,_,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs |
b1b2de81 | 1277 | when str_of_token2 x =$= "{" -> |
abad11c5 C |
1278 | x::aux true (y::Indent_cocci2::xs) |
1279 | | ((Cocci2 _) as x)::((T2 (tok,_,_,_)) as y)::xs | |
b1b2de81 | 1280 | when str_of_token2 y =$= "}" -> |
abad11c5 | 1281 | x::aux started (Unindent_cocci2 true::y::xs) |
34e49164 | 1282 | (* starting the body of the function *) |
abad11c5 C |
1283 | | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
1284 | x::aux true xs | |
5636bb2c | 1285 | | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs |
5636bb2c | 1286 | | ((Cocci2("\n",_,_,_,_)) as x)::xs -> |
abad11c5 C |
1287 | (* dont inline in expr because of weird eval order of ocaml *) |
1288 | let s = String.concat "" !_current_tabbing in | |
1289 | (* can't be C2, for later phases *) | |
1290 | x::Cocci2 (s,-1,-1,-1,None)::aux started xs | |
34e49164 | 1291 | | x::xs -> x::aux started xs in |
5636bb2c | 1292 | (aux false xs,!tabbing_unit) |
34e49164 C |
1293 | |
1294 | ||
1295 | let rec find_paren_comma = function | |
1296 | | [] -> () | |
1297 | ||
1298 | (* do nothing if was like this in original file *) | |
abad11c5 C |
1299 | | { str = "("; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs) |
1300 | | { str = ","; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs) | |
1301 | | { str = ","; idx = Some p1 } :: ({ str = ")"; idx = Some p2} :: _ as xs) | |
1302 | when p2 =|= p1 + 1 -> | |
1303 | find_paren_comma xs | |
34e49164 C |
1304 | |
1305 | (* otherwise yes can adjust *) | |
abad11c5 C |
1306 | | { str = "(" } :: (({ str = ","} as rem) :: _ as xs) |
1307 | | ({ str = "," } as rem) :: ({ str = ","} :: _ as xs) | |
1308 | | ({ str = "," } as rem) :: ({ str = ")"} :: _ as xs) -> | |
1309 | rem.remove <- true; | |
1310 | find_paren_comma xs | |
34e49164 | 1311 | |
ae4735db | 1312 | | x::xs -> |
abad11c5 | 1313 | find_paren_comma xs |
34e49164 | 1314 | |
ae4735db C |
1315 | |
1316 | let fix_tokens toks = | |
34e49164 C |
1317 | let toks = toks +> List.map mk_token_extended in |
1318 | ||
abad11c5 C |
1319 | let cleaner = toks +> exclude (function |
1320 | | {tok2 = T2 (t,_,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
34e49164 C |
1321 | | _ -> false |
1322 | ) in | |
1323 | find_paren_comma cleaner; | |
1324 | ||
1325 | let toks = rebuild_tokens_extented toks in | |
1326 | toks +> List.map (fun x -> x.tok2) | |
1327 | ||
1b9ae606 C |
1328 | (* if we have to remove a '}' that is alone on a line, remove the line too *) |
1329 | let drop_line toks = | |
1330 | let rec space_until_newline toks = | |
1331 | match toks with | |
1332 | | (T2(_, Min _, _, _) as hd) :: tl -> | |
1333 | let (drop, tl) = space_until_newline tl in | |
1334 | (drop, hd :: tl) | |
1335 | | hd :: tl when is_space hd -> | |
1336 | space_until_newline tl | |
1337 | | Fake2 _ :: tl -> | |
1338 | space_until_newline tl | |
1339 | | hd :: tl when is_newline hd -> | |
1340 | (true, toks) | |
1341 | | _ -> | |
1342 | (false, toks) in | |
1343 | let rec loop toks = | |
1344 | match toks with | |
1345 | | (T2(_, Min _, _, _) as x) :: tl | |
1346 | when str_of_token2 x =$= "}" -> | |
1347 | let (drop, tl) = space_until_newline tl in | |
1348 | (drop, x :: tl) | |
1349 | | hd :: tl when is_whitespace hd -> | |
1350 | let (drop, tl) = loop tl in | |
1351 | if drop then | |
1352 | (true, tl) | |
1353 | else | |
1354 | (false, toks) | |
1355 | | _ -> (false, toks) in | |
1356 | let rec find toks = | |
1357 | let (_, toks) = loop toks in | |
1358 | match toks with | |
1359 | | [] -> [] | |
1360 | | hd :: tl -> | |
1361 | hd :: find tl in | |
1362 | find toks | |
34e49164 | 1363 | |
34e49164 C |
1364 | (*****************************************************************************) |
1365 | (* Final unparsing (and debugging support) *) | |
1366 | (*****************************************************************************) | |
1367 | ||
1368 | (* for debugging *) | |
1369 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
1370 | ||
1371 | let kind_of_token2 = function | |
8babbc8f | 1372 | | Fake2 _ -> KFake |
34e49164 C |
1373 | | Cocci2 _ -> KCocci |
1374 | | C2 _ -> KC | |
c491d8ee | 1375 | | Comma _ -> KC |
abad11c5 C |
1376 | | T2 (t,_,_,_) -> |
1377 | (match TH.pinfo_of_tok t with | |
1378 | | Ast_c.ExpandedTok _ -> KExpanded | |
1379 | | Ast_c.OriginTok _ -> KOrigin | |
1380 | | Ast_c.FakeTok _ -> raise (Impossible 139) (* now a Fake2 *) | |
1381 | | Ast_c.AbstractLineTok _ -> raise (Impossible 140) (* now a KC *) | |
1382 | ) | |
1383 | | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise (Impossible 141) | |
ae4735db | 1384 | |
34e49164 C |
1385 | let end_mark = "!" |
1386 | ||
1387 | let start_mark = function | |
1388 | | KFake -> "!F!" | |
ae4735db | 1389 | | KCocci -> "!S!" |
34e49164 C |
1390 | | KC -> "!A!" |
1391 | | KExpanded -> "!E!" | |
1392 | | KOrigin -> "" | |
1393 | ||
1394 | let print_all_tokens2 pr xs = | |
1395 | if !Flag_parsing_c.debug_unparsing | |
1396 | then | |
1397 | let current_kind = ref KOrigin in | |
ae4735db | 1398 | xs +> List.iter (fun t -> |
34e49164 | 1399 | let newkind = kind_of_token2 t in |
b1b2de81 | 1400 | if newkind =*= !current_kind |
34e49164 | 1401 | then pr (str_of_token2 t) |
abad11c5 C |
1402 | else |
1403 | begin | |
1404 | pr (end_mark); | |
1405 | pr (start_mark newkind); | |
1406 | pr (str_of_token2 t); | |
1407 | current_kind := newkind | |
1408 | end | |
34e49164 | 1409 | ); |
ae4735db | 1410 | else |
abad11c5 C |
1411 | let to_whitespace s = |
1412 | let r = String.copy s in | |
1413 | for i = 1 to String.length r do | |
1414 | let c = String.get r (i-1) in | |
1415 | match c with | |
1416 | | ' ' | '\t' | '\r' | '\n' -> () | |
1417 | | _ -> String.set r (i-1) ' ' | |
1418 | done; | |
1419 | r in | |
1420 | let hiding_level = ref 0 in | |
1421 | let handle_token t = | |
1422 | let s = str_of_token2 t in | |
1423 | let hide_current = | |
1424 | match t with | |
1425 | | T2 (t,_,_,_) -> | |
1426 | let i = TH.info_of_tok t in | |
1427 | (match Ast_c.get_annot_info i Token_annot.Exclude_start with | |
1428 | | None -> () | |
1429 | | Some _ -> hiding_level := !hiding_level + 1 | |
1430 | ); | |
1431 | let hide_current = !hiding_level > 0 in | |
1432 | (match Ast_c.get_annot_info i Token_annot.Exclude_end with | |
1433 | | None -> () | |
1434 | | Some _ -> hiding_level := max (!hiding_level - 1) 0 | |
1435 | ); | |
1436 | hide_current | |
1437 | | _ -> !hiding_level > 0 in | |
1438 | if hide_current then to_whitespace s else s in | |
1439 | xs +> List.iter (fun x -> pr (handle_token x)) | |
1440 | ||
ae4735db | 1441 | |
34e49164 C |
1442 | |
1443 | ||
1444 | ||
1445 | (*****************************************************************************) | |
1446 | (* Entry points *) | |
1447 | (*****************************************************************************) | |
1448 | ||
1449 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
1450 | * complete C file, including a modified C file by transformation.ml, | |
1451 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
1452 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
1453 | * fancy stuff when a function was not modified at all. Just need to | |
1454 | * print the list of token as-is. But now pretty_print_c.ml handles | |
1455 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
ae4735db C |
1456 | * optimize a little the pretty printing. |
1457 | * | |
34e49164 C |
1458 | * update: now have PPviastr which goes even faster than PPviatok, so |
1459 | * PPviatok has disappeared. | |
1460 | *) | |
1461 | ||
1462 | type ppmethod = PPnormal | PPviastr | |
1463 | ||
1464 | ||
1465 | ||
1466 | ||
1467 | (* The pp_program function will call pretty_print_c.ml with a special | |
ae4735db | 1468 | * function to print the leaf components, the tokens. When we want to |
34e49164 | 1469 | * print a token, we need to print also maybe the space and comments that |
ae4735db | 1470 | * were close to it in the original file (and that was omitted during the |
34e49164 C |
1471 | * parsing phase), and honor what the cocci-info attached to the token says. |
1472 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
ae4735db | 1473 | * print it and also print some cocci-code attached in a PLUS to it. |
34e49164 C |
1474 | * So we will also maybe call unparse_cocci. Because the cocci-code may |
1475 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
1476 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
1477 | *) | |
1478 | ||
ae4735db | 1479 | let pp_program2 xs outfile = |
abad11c5 | 1480 | with_open_outfile outfile (fun (pr,chan) -> |
ae4735db C |
1481 | let pr s = |
1482 | if !Flag_parsing_c.debug_unparsing | |
34e49164 | 1483 | then begin pr2_no_nl s; flush stderr end |
ae4735db | 1484 | else pr s |
abad11c5 C |
1485 | (* flush chan; *) |
1486 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
34e49164 | 1487 | in |
ae4735db C |
1488 | |
1489 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
34e49164 C |
1490 | (* here can still work on ast *) |
1491 | let e = remove_useless_fakeInfo_struct e in | |
ae4735db | 1492 | |
34e49164 | 1493 | match ppmethod with |
002099fc | 1494 | | PPnormal -> |
abad11c5 C |
1495 | (* now work on tokens *) |
1496 | (* phase1: just get all the tokens, all the information *) | |
1497 | assert(toks_e +> List.for_all (fun t -> | |
1498 | TH.is_origin t or TH.is_expanded t | |
1499 | )); | |
1500 | let toks = get_fakeInfo_and_tokens e toks_e in | |
1501 | let toks = displace_fake_nodes toks in | |
1502 | (* assert Origin;ExpandedTok;Faketok *) | |
1503 | let toks = expand_mcode toks in | |
1504 | ||
1505 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) | |
1506 | * and no tag information, just NOTHING. *) | |
1507 | ||
1508 | let toks = | |
1509 | if !Flag.sgrep_mode2 | |
1510 | then | |
1511 | (* nothing else to do for sgrep *) | |
1512 | drop_expanded(drop_fake(drop_minus toks)) | |
1513 | else | |
1514 | begin | |
fc1ad971 | 1515 | (* phase2: can now start to filter and adjust *) |
abad11c5 C |
1516 | let (toks,tu) = adjust_indentation toks in |
1517 | let toks = adjust_eat_space toks in | |
1518 | let toks = adjust_before_semicolon toks in(*before remove minus*) | |
1519 | let toks = adjust_after_paren toks in(*also before remove minus*) | |
1520 | let toks = drop_space_at_endline toks in | |
1521 | let toks = paren_to_space toks in | |
1522 | let toks = drop_end_comma toks in | |
1b9ae606 C |
1523 | |
1524 | let toks = drop_line toks in | |
1525 | ||
abad11c5 | 1526 | let toks = remove_minus_and_between_and_expanded_and_fake toks in |
fc1ad971 | 1527 | (* assert Origin + Cocci + C and no minus *) |
abad11c5 C |
1528 | let toks = add_space toks in |
1529 | let toks = add_newlines toks tu in | |
1530 | let toks = paren_then_brace toks in | |
fc1ad971 | 1531 | let toks = fix_tokens toks in |
abad11c5 C |
1532 | toks |
1533 | end in | |
34e49164 | 1534 | |
abad11c5 C |
1535 | (* in theory here could reparse and rework the ast! or |
1536 | * apply some SP. Not before cos julia may have generated | |
1537 | * not parsable file. Need do unparsing_tricks call before | |
1538 | * being ready to reparse. *) | |
1539 | print_all_tokens2 pr toks; | |
34e49164 C |
1540 | |
1541 | | PPviastr -> pr str | |
1542 | ) | |
1543 | ) | |
1544 | ||
ae4735db | 1545 | let pp_program a b = |
abad11c5 | 1546 | profile_code "C unparsing" (fun () -> pp_program2 a b) |
34e49164 | 1547 | |
485bce71 | 1548 | |
ae4735db | 1549 | let pp_program_default xs outfile = |
485bce71 C |
1550 | let xs' = xs +> List.map (fun x -> x, PPnormal) in |
1551 | pp_program xs' outfile |