Commit | Line | Data |
---|---|---|
0708f913 | 1 | (* Yoann Padioleau, Julia Lawall |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
0708f913 | 4 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU |
34e49164 C |
5 | * |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
34e49164 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
ae4735db C |
14 | * |
15 | * | |
485bce71 | 16 | * Modifications by Julia Lawall for better newline handling. |
34e49164 C |
17 | *) |
18 | open Common | |
19 | ||
20 | open Ast_c | |
21 | ||
22 | module TH = Token_helpers | |
23 | ||
24 | ||
b1b2de81 C |
25 | (* should keep comments and directives in between adjacent deleted terms, |
26 | but not comments and directives within deleted terms. should use the | |
27 | labels found in the control-flow graph *) | |
28 | ||
34e49164 C |
29 | |
30 | ||
708f4980 C |
31 | (*****************************************************************************) |
32 | (* Wrappers *) | |
33 | (*****************************************************************************) | |
34 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing | |
35 | ||
34e49164 C |
36 | (*****************************************************************************) |
37 | (* Types used during the intermediate phases of the unparsing *) | |
38 | (*****************************************************************************) | |
39 | ||
ae4735db | 40 | type token1 = |
34e49164 C |
41 | | Fake1 of info |
42 | | T1 of Parser_c.token | |
43 | ||
44 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
45 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
46 | * token and get something simpler ? because we need to know if the | |
47 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
48 | * analysis so easier to keep with the token. | |
ae4735db | 49 | * |
34e49164 C |
50 | * This type contains the whole information. Have all the tokens with this |
51 | * type. | |
52 | *) | |
708f4980 | 53 | type min = |
5626f154 C |
54 | Min of (int list (* match numbers from witness trees *) * |
55 | int (* adjacency information *)) | |
708f4980 C |
56 | | Ctx |
57 | ||
ae4735db C |
58 | type token2 = |
59 | | T2 of Parser_c.token * min * | |
34e49164 C |
60 | int option (* orig index, abstracting away comments and space *) |
61 | | Fake2 | |
708f4980 | 62 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
5636bb2c | 63 | * Unparse_cocci.nlhint option |
34e49164 | 64 | | C2 of string |
c491d8ee | 65 | | Comma of string |
34e49164 | 66 | | Indent_cocci2 |
c3e37e97 | 67 | | Unindent_cocci2 of bool (* true for permanent, false for temporary *) |
34e49164 C |
68 | |
69 | (* not used yet *) | |
ae4735db | 70 | type token3 = |
34e49164 C |
71 | | T3 of Parser_c.token |
72 | | Cocci3 of string | |
73 | | C3 of string | |
74 | ||
75 | ||
76 | (* similar to the tech in parsing_hack *) | |
77 | type token_extended = { | |
78 | tok2 : token2; | |
79 | str : string; | |
80 | idx: int option; (* to know if 2 tokens were consecutive in orig file *) | |
81 | mutable new_tokens_before : token2 list; | |
82 | mutable remove : bool; | |
83 | } | |
84 | ||
85 | ||
86 | (*****************************************************************************) | |
87 | (* Helpers *) | |
88 | (*****************************************************************************) | |
89 | ||
ae4735db | 90 | let info_of_token1 t = |
34e49164 C |
91 | match t with |
92 | | Fake1 info -> info | |
93 | | T1 tok -> TH.info_of_tok tok | |
94 | ||
002099fc C |
95 | let print_token1 = function |
96 | T1 tok -> TH.str_of_tok tok | |
97 | | Fake1 info -> "fake" | |
98 | ||
34e49164 C |
99 | let str_of_token2 = function |
100 | | T2 (t,_,_) -> TH.str_of_tok t | |
101 | | Fake2 -> "" | |
5636bb2c | 102 | | Cocci2 (s,_,_,_,_) -> s |
34e49164 | 103 | | C2 s -> s |
c491d8ee | 104 | | Comma s -> s |
34e49164 | 105 | | Indent_cocci2 -> "" |
c3e37e97 | 106 | | Unindent_cocci2 _ -> "" |
34e49164 C |
107 | |
108 | let print_token2 = function | |
708f4980 C |
109 | | T2 (t,b,_) -> |
110 | let b_str = | |
111 | match b with | |
112 | Min (index,adj) -> | |
113 | Printf.sprintf "-%d[%s]" adj | |
114 | (String.concat " " (List.map string_of_int index)) | |
115 | | Ctx -> "" in | |
116 | "T2:"^b_str^TH.str_of_tok t | |
fc1ad971 | 117 | | Fake2 -> "fake" |
5636bb2c | 118 | | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
34e49164 | 119 | | C2 s -> "C2:"^s |
c491d8ee | 120 | | Comma s -> "Comma:"^s |
34e49164 | 121 | | Indent_cocci2 -> "Indent" |
c3e37e97 | 122 | | Unindent_cocci2 _ -> "Unindent" |
34e49164 | 123 | |
002099fc | 124 | let simple_print_all_tokens1 l = |
90aeb998 | 125 | List.iter (function x -> Printf.printf "|%s| " (print_token1 x)) l; |
002099fc C |
126 | Printf.printf "\n" |
127 | ||
708f4980 | 128 | let simple_print_all_tokens2 l = |
fc1ad971 | 129 | List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l; |
34e49164 C |
130 | Printf.printf "\n" |
131 | ||
132 | let str_of_token3 = function | |
133 | | T3 t -> TH.str_of_tok t | |
134 | | Cocci3 s | C3 s -> s | |
135 | ||
136 | ||
137 | ||
ae4735db C |
138 | let mk_token_extended x = |
139 | let origidx = | |
34e49164 | 140 | match x with |
ae4735db | 141 | | T2 (_,_, idx) -> idx |
34e49164 C |
142 | | _ -> None |
143 | in | |
ae4735db | 144 | { tok2 = x; |
34e49164 C |
145 | str = str_of_token2 x; |
146 | idx = origidx; | |
147 | new_tokens_before = []; | |
148 | remove = false; | |
149 | } | |
150 | ||
ae4735db | 151 | let rebuild_tokens_extented toks_ext = |
34e49164 | 152 | let _tokens = ref [] in |
ae4735db | 153 | toks_ext +> List.iter (fun tok -> |
34e49164 C |
154 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); |
155 | if not tok.remove then push2 tok.tok2 _tokens; | |
156 | ); | |
157 | let tokens = List.rev !_tokens in | |
158 | (tokens +> List.map mk_token_extended) | |
159 | ||
160 | ||
161 | let mcode_contain_plus = function | |
162 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
163 | | Ast_cocci.CONTEXT _ -> true | |
485bce71 | 164 | (* patch: when need full coccinelle transformation *) |
708f4980 C |
165 | | Ast_cocci.MINUS (_,_,_,[]) -> false |
166 | | Ast_cocci.MINUS (_,_,_,x::xs) -> true | |
951c7801 | 167 | | Ast_cocci.PLUS _ -> raise Impossible |
34e49164 | 168 | |
ae4735db | 169 | let contain_plus info = |
34e49164 C |
170 | let mck = Ast_c.mcode_of_info info in |
171 | mcode_contain_plus mck | |
172 | ||
173 | (*****************************************************************************) | |
174 | (* Last fix on the ast *) | |
175 | (*****************************************************************************) | |
176 | ||
ae4735db | 177 | (* Because of the ugly trick to handle initialiser, I generate fake ',' |
34e49164 C |
178 | * for the last initializer element, but if there is nothing around it, |
179 | * I don't want in the end to print it. | |
180 | *) | |
181 | ||
ae4735db | 182 | let remove_useless_fakeInfo_struct program = |
34e49164 | 183 | let bigf = { Visitor_c.default_visitor_c_s with |
ae4735db | 184 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
34e49164 | 185 | match k ini with |
113803cf | 186 | | InitList args, ii -> |
34e49164 C |
187 | (match ii with |
188 | | [_i1;_i2] -> ini | |
113803cf | 189 | | [i1;i2;iicommaopt] -> |
34e49164 C |
190 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) |
191 | && (Ast_c.is_fake iicommaopt) | |
192 | (* sometimes the guy put a normal iicommaopt *) | |
34e49164 C |
193 | then InitList args, [i1;i2] |
194 | else InitList args, [i1;i2;iicommaopt] | |
ae4735db | 195 | | [i1;i2;iicommaopt;end_comma_opt] -> |
113803cf C |
196 | (* only in #define. end_comma_opt canot be fake *) |
197 | (* not sure if this will be considered ambiguous with a previous | |
198 | case? *) | |
199 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) | |
200 | && (Ast_c.is_fake iicommaopt) | |
201 | (* sometimes the guy put a normal iicommaopt *) | |
202 | then InitList args, [i1;i2;end_comma_opt] | |
203 | else InitList args, [i1;i2;iicommaopt;end_comma_opt] | |
34e49164 C |
204 | | _ -> raise Impossible |
205 | ) | |
206 | | x -> x | |
207 | ) | |
208 | } in | |
209 | Visitor_c.vk_toplevel_s bigf program | |
210 | ||
211 | ||
212 | (*****************************************************************************) | |
213 | (* Tokens1 generation *) | |
214 | (*****************************************************************************) | |
215 | ||
ae4735db C |
216 | let get_fakeInfo_and_tokens celem toks = |
217 | let toks_in = ref toks in | |
34e49164 C |
218 | let toks_out = ref [] in |
219 | ||
220 | (* todo? verify good order of position ? *) | |
ae4735db | 221 | let pr_elem info = |
34e49164 | 222 | match Ast_c.pinfo_of_info info with |
ae4735db | 223 | | FakeTok _ -> |
34e49164 | 224 | Common.push2 (Fake1 info) toks_out |
ae4735db | 225 | | OriginTok _ | ExpandedTok _ -> |
34e49164 | 226 | (* get the associated comments/space/cppcomment tokens *) |
978fd7e5 | 227 | let (before, x, after) = |
ae4735db | 228 | !toks_in +> Common.split_when (fun tok -> |
34e49164 C |
229 | info =*= TH.info_of_tok tok) |
230 | in | |
b1b2de81 | 231 | assert(info =*= TH.info_of_tok x); |
34e49164 | 232 | (*old: assert(before +> List.for_all (TH.is_comment)); *) |
ae4735db | 233 | before +> List.iter (fun x -> |
34e49164 | 234 | if not (TH.is_comment x) |
0708f913 | 235 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) |
34e49164 C |
236 | (* case such as int asm d3("x"); not yet in ast *) |
237 | ); | |
238 | before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); | |
239 | push2 (T1 x) toks_out; | |
240 | toks_in := after; | |
ae4735db | 241 | | AbstractLineTok _ -> |
34e49164 C |
242 | (* can be called on type info when for instance use -type_c *) |
243 | if !Flag_parsing_c.pretty_print_type_info | |
244 | then Common.push2 (Fake1 info) toks_out | |
ae4735db | 245 | else raise Impossible (* at this stage *) |
34e49164 C |
246 | in |
247 | ||
248 | let pr_space _ = () in (* use the spacing that is there already *) | |
249 | ||
250 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
251 | ||
252 | if not (null !toks_in) | |
0708f913 | 253 | then failwith "WEIRD: unparsing not finished"; |
34e49164 C |
254 | |
255 | List.rev !toks_out | |
256 | ||
257 | (* Fake nodes that have BEFORE code should be moved over any subsequent | |
258 | whitespace and newlines, but not any comments, to get as close to the affected | |
002099fc | 259 | code as possible. Similarly, fake nodes that have AFTER code should be moved |
34e49164 C |
260 | backwards. No fake nodes should have both before and after code. *) |
261 | ||
262 | let displace_fake_nodes toks = | |
263 | let is_fake = function Fake1 _ -> true | _ -> false in | |
264 | let is_whitespace = function | |
265 | T1(Parser_c.TCommentSpace _) | |
485bce71 | 266 | (* patch: cocci *) |
34e49164 C |
267 | | T1(Parser_c.TCommentNewline _) -> true |
268 | | _ -> false in | |
269 | let rec loop toks = | |
270 | let fake_info = | |
271 | try Some (Common.split_when is_fake toks) | |
272 | with Not_found -> None in | |
273 | match fake_info with | |
274 | Some(bef,((Fake1 info) as fake),aft) -> | |
275 | (match !(info.cocci_tag) with | |
ae4735db | 276 | | Some x -> |
708f4980 | 277 | (match x with |
002099fc | 278 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> |
34e49164 | 279 | (* move the fake node forwards *) |
002099fc C |
280 | let (whitespace,rest) = Common.span is_whitespace aft in |
281 | bef @ whitespace @ fake :: (loop rest) | |
282 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
34e49164 | 283 | (* move the fake node backwards *) |
002099fc C |
284 | let revbef = List.rev bef in |
285 | let (revwhitespace,revprev) = Common.span is_whitespace revbef in | |
286 | let whitespace = List.rev revwhitespace in | |
287 | let prev = List.rev revprev in | |
288 | prev @ fake :: (loop (whitespace @ aft)) | |
289 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) -> | |
290 | bef @ fake :: (loop aft) | |
291 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
292 | failwith "fake node should not be before-after" | |
293 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) | |
485bce71 | 294 | ) |
ae4735db | 295 | | None -> |
708f4980 C |
296 | bef @ fake :: (loop aft) |
297 | ) | |
34e49164 C |
298 | | None -> toks |
299 | | _ -> raise Impossible in | |
300 | loop toks | |
301 | ||
302 | (*****************************************************************************) | |
303 | (* Tokens2 generation *) | |
304 | (*****************************************************************************) | |
305 | ||
b1b2de81 | 306 | let comment2t2 = function |
5636bb2c C |
307 | (Token_c.TCommentCpp |
308 | (* not sure iif the following list is exhaustive or complete *) | |
309 | (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError), | |
310 | (info : Token_c.info)) -> | |
311 | C2(info.Common.str) | |
312 | | (Token_c.TCommentCpp x,(info : Token_c.info)) -> | |
b1b2de81 C |
313 | C2("\n"^info.Common.str^"\n") |
314 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x)) | |
315 | ||
ae4735db | 316 | let expand_mcode toks = |
34e49164 C |
317 | let toks_out = ref [] in |
318 | ||
319 | let index = ref 0 in | |
320 | ||
ae4735db | 321 | let add_elem t minus = |
34e49164 | 322 | match t with |
ae4735db | 323 | | Fake1 info -> |
34e49164 | 324 | let str = Ast_c.str_of_info info in |
b1b2de81 | 325 | if str =$= "" |
34e49164 | 326 | then push2 (Fake2) toks_out |
c491d8ee C |
327 | (* fx the fake "," at the end of a structure or enum. |
328 | no idea what other fake info there can be... *) | |
329 | else push2 (Comma str) toks_out | |
ae4735db C |
330 | |
331 | ||
002099fc | 332 | | T1 tok -> |
b1b2de81 | 333 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
34e49164 | 334 | (* no tag on expandedTok ! *) |
fc1ad971 C |
335 | let modified = function |
336 | None -> false | |
337 | | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false | |
338 | | _ -> true in | |
339 | (if (TH.is_expanded tok && | |
340 | modified !((TH.info_of_tok tok).cocci_tag) | |
341 | (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)) | |
0708f913 C |
342 | then |
343 | failwith | |
344 | (Printf.sprintf | |
345 | "expanded token %s on line %d is either modified or stored in a metavariable" | |
346 | (TH.str_of_tok tok) (TH.line_of_tok tok))); | |
34e49164 | 347 | |
ae4735db | 348 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> |
34e49164 C |
349 | { i with cocci_tag = ref Ast_c.emptyAnnot; } |
350 | ) in | |
351 | ||
ae4735db | 352 | let optindex = |
34e49164 C |
353 | if TH.is_origin tok && not (TH.is_real_comment tok) |
354 | then begin | |
355 | incr index; | |
356 | Some !index | |
357 | end | |
358 | else None | |
359 | in | |
360 | ||
361 | push2 (T2 (tok', minus, optindex)) toks_out | |
362 | in | |
363 | ||
ae4735db C |
364 | let expand_info t = |
365 | let (mcode,env) = | |
708f4980 | 366 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in |
34e49164 | 367 | |
5636bb2c C |
368 | let pr_cocci s ln col rcol hint = |
369 | push2 (Cocci2(s,ln,col,rcol,hint)) toks_out in | |
ae4735db | 370 | let pr_c info = |
b1b2de81 C |
371 | (match Ast_c.pinfo_of_info info with |
372 | Ast_c.AbstractLineTok _ -> | |
373 | push2 (C2 (Ast_c.str_of_info info)) toks_out | |
374 | | Ast_c.FakeTok (s,_) -> | |
375 | push2 (C2 s) toks_out | |
34e49164 C |
376 | | _ -> |
377 | Printf.printf "line: %s\n" (Common.dump info); | |
b1b2de81 C |
378 | failwith "not an abstract line"); |
379 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> | |
380 | List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in | |
381 | ||
708f4980 | 382 | let pr_barrier ln col = (* marks a position, used around C code *) |
5636bb2c | 383 | push2 (Cocci2("",ln,col,col,None)) toks_out in |
708f4980 | 384 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) |
b1b2de81 | 385 | |
708f4980 | 386 | let pr_cspace _ = push2 (C2 " ") toks_out in |
34e49164 | 387 | |
708f4980 C |
388 | let pr_space _ = () (* rely on add_space in cocci code *) in |
389 | let pr_arity _ = () (* not interested *) in | |
34e49164 C |
390 | |
391 | let indent _ = push2 Indent_cocci2 toks_out in | |
c3e37e97 | 392 | let unindent x = push2 (Unindent_cocci2 x) toks_out in |
34e49164 | 393 | |
708f4980 C |
394 | let args_pp = |
395 | (env, pr_cocci, pr_c, pr_cspace, | |
396 | (match !Flag_parsing_c.spacing with | |
397 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), | |
398 | pr_arity, | |
399 | (match !Flag_parsing_c.spacing with | |
400 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), | |
401 | indent, unindent) in | |
34e49164 | 402 | |
ae4735db C |
403 | (* old: when for yacfe with partial cocci: |
404 | * add_elem t false; | |
485bce71 | 405 | *) |
34e49164 | 406 | |
485bce71 | 407 | (* patch: when need full coccinelle transformation *) |
faf9a90c | 408 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
34e49164 | 409 | match mcode with |
ae4735db | 410 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
34e49164 | 411 | (* Why adding ? because I want to have all the information, the whole |
ae4735db | 412 | * set of tokens, so I can then process and remove the |
34e49164 | 413 | * is_between_two_minus for instance *) |
708f4980 | 414 | add_elem t (Min (inst,adj)); |
faf9a90c | 415 | unparser any_xxs Unparse_cocci.InPlace |
ae4735db | 416 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
34e49164 | 417 | (match any_befaft with |
ae4735db | 418 | | Ast_cocci.NOTHING -> |
708f4980 | 419 | add_elem t Ctx |
951c7801 | 420 | | Ast_cocci.BEFORE (xxs,_) -> |
faf9a90c | 421 | unparser xxs Unparse_cocci.Before; |
708f4980 | 422 | add_elem t Ctx |
ae4735db | 423 | | Ast_cocci.AFTER (xxs,_) -> |
708f4980 | 424 | add_elem t Ctx; |
faf9a90c | 425 | unparser xxs Unparse_cocci.After; |
ae4735db | 426 | | Ast_cocci.BEFOREAFTER (xxs, yys, _) -> |
faf9a90c | 427 | unparser xxs Unparse_cocci.Before; |
708f4980 | 428 | add_elem t Ctx; |
faf9a90c | 429 | unparser yys Unparse_cocci.After; |
34e49164 | 430 | ) |
951c7801 | 431 | | Ast_cocci.PLUS _ -> raise Impossible |
34e49164 C |
432 | |
433 | in | |
434 | ||
435 | toks +> List.iter expand_info; | |
436 | List.rev !toks_out | |
ae4735db | 437 | |
34e49164 C |
438 | |
439 | (*****************************************************************************) | |
440 | (* Tokens2 processing, filtering, adjusting *) | |
441 | (*****************************************************************************) | |
442 | ||
002099fc | 443 | let is_space = function |
fc1ad971 | 444 | | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *) |
ae4735db | 445 | | _ -> false |
fc1ad971 C |
446 | |
447 | let is_newline = function | |
448 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
449 | | _ -> false | |
450 | ||
451 | let is_whitespace = function | |
ae4735db | 452 | | (T2 (t,_b,_i)) -> |
002099fc C |
453 | (match t with |
454 | | Parser_c.TCommentSpace _ -> true (* only whitespace *) | |
fc1ad971 | 455 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
002099fc C |
456 | | _ -> false |
457 | ) | |
ae4735db | 458 | | _ -> false |
002099fc | 459 | |
34e49164 | 460 | let is_minusable_comment = function |
ae4735db | 461 | | (T2 (t,_b,_i)) -> |
34e49164 C |
462 | (match t with |
463 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
ae4735db | 464 | (* patch: coccinelle *) |
002099fc | 465 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
7f004419 | 466 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
002099fc C |
467 | | Parser_c.TComment _ |
468 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
b1b2de81 C |
469 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
470 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *) | |
34e49164 C |
471 | -> true |
472 | ||
b1b2de81 | 473 | | Parser_c.TCommentMisc _ |
0708f913 | 474 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) |
34e49164 C |
475 | -> false |
476 | ||
477 | | _ -> false | |
478 | ) | |
ae4735db | 479 | | _ -> false |
34e49164 | 480 | |
fc1ad971 | 481 | let is_minusable_comment_nocpp = function |
ae4735db | 482 | | (T2 (t,_b,_i)) -> |
fc1ad971 C |
483 | (match t with |
484 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
ae4735db | 485 | (* patch: coccinelle *) |
fc1ad971 | 486 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
7f004419 | 487 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
fc1ad971 C |
488 | | Parser_c.TComment _ -> true |
489 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
490 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) | |
491 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
492 | -> false | |
493 | ||
494 | | Parser_c.TCommentMisc _ | |
495 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) | |
496 | -> false | |
497 | ||
498 | | _ -> false | |
499 | ) | |
ae4735db | 500 | | _ -> false |
fc1ad971 | 501 | |
34e49164 | 502 | let all_coccis = function |
c491d8ee | 503 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ -> true |
34e49164 C |
504 | | _ -> false |
505 | ||
b1b2de81 C |
506 | (*previously gave up if the first character was a newline, but not clear why*) |
507 | let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x | |
34e49164 | 508 | |
708f4980 | 509 | let set_minus_comment adj = function |
ae4735db | 510 | | T2 (t,Ctx,idx) -> |
34e49164 C |
511 | let str = TH.str_of_tok t in |
512 | (match t with | |
513 | | Parser_c.TCommentSpace _ | |
ae4735db | 514 | (* patch: coccinelle *) |
34e49164 C |
515 | | Parser_c.TCommentNewline _ -> () |
516 | ||
ae4735db C |
517 | | Parser_c.TComment _ |
518 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
b1b2de81 C |
519 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
520 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
ae4735db | 521 | -> |
b1b2de81 C |
522 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" |
523 | (TH.line_of_tok t) str) | |
34e49164 C |
524 | | _ -> raise Impossible |
525 | ); | |
708f4980 | 526 | T2 (t, Min adj, idx) |
ae4735db | 527 | (* patch: coccinelle *) |
fc1ad971 | 528 | | T2 (t,Min adj,idx) as x -> x |
34e49164 C |
529 | | _ -> raise Impossible |
530 | ||
708f4980 | 531 | let set_minus_comment_or_plus adj = function |
c491d8ee | 532 | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 | Unindent_cocci2 _ as x -> x |
708f4980 | 533 | | x -> set_minus_comment adj x |
34e49164 | 534 | |
fc1ad971 C |
535 | let drop_minus xs = |
536 | xs +> Common.exclude (function | |
537 | | T2 (t,Min adj,_) -> true | |
538 | | _ -> false | |
539 | ) | |
540 | ||
5626f154 C |
541 | let drop_expanded_and_fake xs = |
542 | xs +> Common.exclude (function | |
34e49164 C |
543 | | T2 (t,_,_) when TH.is_expanded t -> true |
544 | | Fake2 -> true | |
34e49164 C |
545 | | _ -> false |
546 | ) | |
5626f154 C |
547 | |
548 | let remove_minus_and_between_and_expanded_and_fake xs = | |
549 | ||
550 | (* get rid of expanded and fake tok *) | |
551 | let xs = drop_expanded_and_fake xs in | |
34e49164 | 552 | |
113803cf | 553 | let minus_or_comment = function |
708f4980 | 554 | T2(_,Min adj,_) -> true |
113803cf C |
555 | | x -> is_minusable_comment x in |
556 | ||
fc1ad971 C |
557 | let minus_or_comment_nocpp = function |
558 | T2(_,Min adj,_) -> true | |
559 | | x -> is_minusable_comment_nocpp x in | |
560 | ||
561 | let common_adj (index1,adj1) (index2,adj2) = | |
562 | adj1 = adj2 (* same adjacency info *) && | |
563 | (* non-empty intersection of witness trees *) | |
564 | not ((Common.inter_set index1 index2) = []) in | |
565 | ||
fc1ad971 C |
566 | (* new idea: collects regions not containing non-space context code |
567 | if two adjacent adjacent minus tokens satisfy common_adj then delete | |
568 | all spaces, comments etc between them | |
569 | if two adjacent minus tokens do not satisfy common_adj only delete | |
570 | the spaces between them if there are no comments, etc. | |
571 | if the region contain no plus code and is both preceded and followed | |
572 | by a newline, delete the initial newline. *) | |
573 | ||
574 | let rec adjust_around_minus = function | |
34e49164 | 575 | [] -> [] |
708f4980 | 576 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x):: |
fc1ad971 C |
577 | (T2(_,Min adj1,_) as t1)::xs -> |
578 | let (minus_list,rest) = Common.span not_context (t1::xs) in | |
579 | let contains_plus = List.exists is_plus minus_list in | |
580 | let x = | |
581 | match List.rev minus_list with | |
582 | (T2(Parser_c.TCommentNewline c,_b,_i))::rest | |
583 | when List.for_all minus_or_comment minus_list -> | |
584 | set_minus_comment_or_plus adj1 x | |
585 | | _ -> x in | |
586 | x :: adjust_within_minus contains_plus minus_list @ | |
587 | adjust_around_minus rest | |
588 | | (T2(_,Min adj1,_) as t1)::xs -> | |
589 | let (minus_list,rest) = Common.span not_context (t1::xs) in | |
590 | let contains_plus = List.exists is_plus minus_list in | |
591 | adjust_within_minus contains_plus minus_list @ adjust_around_minus rest | |
c491d8ee C |
592 | | x::xs -> |
593 | x :: adjust_around_minus xs | |
fc1ad971 C |
594 | and adjust_within_minus cp (* contains plus *) = function |
595 | (T2(_,Min adj1,_) as t1)::xs -> | |
596 | let not_minus = function T2(_,Min _,_) -> false | _ -> true in | |
597 | let (not_minus_list,rest) = Common.span not_minus xs in | |
598 | t1 :: | |
34e49164 | 599 | (match rest with |
fc1ad971 C |
600 | (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 -> |
601 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) | |
602 | @ (adjust_within_minus cp (t2::xs)) | |
603 | | (T2(_,Min adj2,_) as t2)::xs -> | |
5626f154 | 604 | if not cp && List.for_all is_whitespace not_minus_list |
fc1ad971 C |
605 | then |
606 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) | |
607 | @ (adjust_within_minus cp (t2::xs)) | |
5626f154 C |
608 | else |
609 | not_minus_list @ (adjust_within_minus cp (t2::xs)) | |
fc1ad971 C |
610 | | _ -> |
611 | if cp | |
612 | then xs | |
613 | else | |
614 | let (spaces,rest) = Common.span is_space xs in | |
615 | (List.map (set_minus_comment_or_plus adj1) spaces) | |
616 | @ rest) | |
617 | | xs -> failwith "should always start with minus" | |
618 | and not_context = function | |
619 | (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false | |
620 | | _ -> true | |
621 | and is_plus = function | |
c491d8ee | 622 | C2 _ | Comma _ | Cocci2 _ -> true |
fc1ad971 | 623 | | _ -> false in |
34e49164 | 624 | |
fc1ad971 | 625 | let xs = adjust_around_minus xs in |
34e49164 | 626 | |
113803cf | 627 | (* this drops blank lines after a brace introduced by removing code *) |
fc1ad971 C |
628 | let minus_or_comment_nonl = function |
629 | T2(_,Min adj,_) -> true | |
630 | | T2(Parser_c.TCommentNewline _,_b,_i) -> false | |
631 | | x -> is_minusable_comment x in | |
632 | ||
113803cf C |
633 | let rec adjust_after_brace = function |
634 | [] -> [] | |
708f4980 | 635 | | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs) |
b1b2de81 | 636 | when str_of_token2 x =$= "{" -> |
fc1ad971 | 637 | let (between_minus,rest) = Common.span minus_or_comment_nonl xs in |
113803cf C |
638 | let is_whitespace = function |
639 | T2(Parser_c.TCommentSpace _,_b,_i) | |
640 | (* patch: cocci *) | |
641 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
642 | | _ -> false in | |
643 | let (newlines,rest) = Common.span is_whitespace rest in | |
644 | let (drop_newlines,last_newline) = | |
645 | let rec loop = function | |
646 | [] -> ([],[]) | |
647 | | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest -> | |
648 | (List.rev rest,[x]) | |
649 | | x::xs -> | |
650 | let (drop_newlines,last_newline) = loop xs in | |
651 | (drop_newlines,x::last_newline) in | |
652 | loop (List.rev newlines) in | |
708f4980 | 653 | x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@ |
113803cf C |
654 | last_newline@ |
655 | adjust_after_brace rest | |
656 | | x::xs -> x::adjust_after_brace xs in | |
657 | ||
658 | let xs = adjust_after_brace xs in | |
659 | ||
fc1ad971 C |
660 | (* search backwards from context } over spaces until reaching a newline. |
661 | then go back over all minus code until reaching some context or + code. | |
662 | get rid of all intervening spaces, newlines, and comments | |
663 | input is reversed *) | |
664 | let rec adjust_before_brace = function | |
665 | [] -> [] | |
666 | | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x -> | |
667 | let (outer_spaces,rest) = Common.span is_space xs in | |
668 | x :: outer_spaces @ | |
669 | (match rest with | |
670 | ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) :: | |
671 | (* the rest of this code is the same as from_newline below | |
672 | but merging them seems to be error prone... *) | |
673 | ((T2 (t, Min adj, idx)) as m) :: rest -> | |
674 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
675 | h :: m :: | |
676 | (List.map (set_minus_comment adj) spaces) @ | |
677 | (adjust_before_brace rest) | |
678 | | _ -> adjust_before_brace rest) | |
679 | | x::xs -> x :: (adjust_before_brace xs) in | |
680 | ||
681 | let from_newline = function | |
682 | ((T2 (t, Min adj, idx)) as m) :: rest -> | |
683 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
684 | m :: | |
685 | (List.map (set_minus_comment adj) spaces) @ | |
686 | (adjust_before_brace rest) | |
c491d8ee C |
687 | | ((T2 (t0, Ctx, idx0)) as m0) :: ((T2 (t, Min adj, idx)) as m) :: rest |
688 | when TH.str_of_tok t0 = "" -> | |
689 | (* This is for the case of a #define that is completely deleted, | |
690 | because a #define has a strange EOL token at the end. | |
691 | We hope there i no other kind of token that is represented by | |
692 | "", but it seems like changing the kind of token might break | |
693 | the end of entity recognition in the C parser. | |
694 | See parsing_hacks.ml *) | |
695 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
696 | m0 :: m :: | |
697 | (List.map (set_minus_comment adj) spaces) @ | |
698 | (adjust_before_brace rest) | |
fc1ad971 C |
699 | | rest -> adjust_before_brace rest in |
700 | ||
701 | let xs = List.rev (from_newline (List.rev xs)) in | |
702 | let xs = drop_minus xs in | |
34e49164 C |
703 | xs |
704 | ||
faf9a90c C |
705 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
706 | let adjust_before_semicolon toks = | |
707 | let toks = List.rev toks in | |
002099fc | 708 | let rec search_semic = function |
faf9a90c | 709 | [] -> [] |
002099fc | 710 | | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs -> |
faf9a90c | 711 | if List.mem (str_of_token2 x) [";";")";","] |
002099fc C |
712 | then x :: search_minus false xs |
713 | else x :: search_semic xs | |
714 | | x::xs -> x :: search_semic xs | |
715 | and search_minus seen_minus xs = | |
716 | let (spaces, rest) = Common.span is_space xs in | |
717 | (* only delete spaces if something is actually deleted *) | |
718 | match rest with | |
719 | ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest | |
720 | | _ -> if seen_minus then rest else xs in | |
721 | List.rev (search_semic toks) | |
34e49164 C |
722 | |
723 | let is_ident_like s = s ==~ Common.regexp_alpha | |
724 | ||
aba5c457 C |
725 | let rec drop_space_at_endline = function |
726 | [] -> [] | |
727 | | [x] -> [x] | |
728 | | ((T2(Parser_c.TCommentSpace _,Ctx,_i)) as a)::rest -> | |
729 | let (outer_spaces,rest) = Common.span is_space rest in | |
730 | let minus_or_comment_or_space_nocpp = function | |
731 | T2(_,Min adj,_) -> true | |
732 | | (T2(Parser_c.TCommentSpace _,Ctx,_i)) -> true | |
18b1275a C |
733 | | (T2(Parser_c.TCommentNewline _,Ctx,_i)) -> false |
734 | | x -> false in | |
aba5c457 | 735 | let (minus,rest) = Common.span minus_or_comment_or_space_nocpp rest in |
18b1275a C |
736 | let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in |
737 | if List.exists (function T2(_,Min adj,_) -> true | _ -> false) minus | |
738 | then | |
739 | match rest with | |
740 | ((T2(Parser_c.TCommentNewline _,Ctx,_i)) as a)::rest -> | |
741 | (* drop trailing spaces *) | |
742 | minus@a::(drop_space_at_endline rest) | |
743 | | _ -> fail() | |
744 | else fail() | |
aba5c457 C |
745 | | a :: rest -> a :: drop_space_at_endline rest |
746 | ||
747 | (* if a removed ( is between two tokens, then add a space *) | |
748 | let rec paren_to_space = function | |
749 | [] -> [] | |
750 | | [x] -> [x] | |
751 | | [x;y] -> [x;y] | |
752 | | ((T2(_,Ctx,_)) as a)::((T2(t,Min _,_)) as b)::((T2(_,Ctx,_)) as c)::rest | |
753 | when not (is_whitespace a) && TH.str_of_tok t = "(" -> | |
aba5c457 C |
754 | a :: b :: (C2 " ") :: (paren_to_space (c :: rest)) |
755 | | a :: rest -> a :: (paren_to_space rest) | |
756 | ||
ae4735db | 757 | let rec add_space xs = |
34e49164 C |
758 | match xs with |
759 | | [] -> [] | |
760 | | [x] -> [x] | |
5636bb2c | 761 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
708f4980 C |
762 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && |
763 | not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly -> | |
764 | (* this only works within a line. could consider whether | |
765 | something should be done to add newlines too, rather than | |
766 | printing them explicitly in unparse_cocci. *) | |
767 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) | |
1eddfd50 C |
768 | | ((T2(_,Ctx,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *) |
769 | let sx = str_of_token2 x in | |
770 | let sy = str_of_token2 y in | |
771 | if is_ident_like sx && (is_ident_like sy or List.mem sy ["="]) | |
772 | then x::C2 " "::(add_space (y::xs)) | |
773 | else x::(add_space (y::xs)) | |
774 | | x::y::xs -> (* not boundary, not sure if it is possible *) | |
34e49164 C |
775 | let sx = str_of_token2 x in |
776 | let sy = str_of_token2 y in | |
faf9a90c | 777 | if is_ident_like sx && is_ident_like sy |
34e49164 C |
778 | then x::C2 " "::(add_space (y::xs)) |
779 | else x::(add_space (y::xs)) | |
780 | ||
c491d8ee C |
781 | (* A fake comma is added at the end of an unordered initlist or a enum |
782 | decl, if the initlist or enum doesn't already end in a comma. This is only | |
783 | needed if there is + code, ie if we see Cocci after it in the code sequence *) | |
784 | ||
785 | let rec drop_end_comma = function | |
786 | [] -> [] | |
787 | | [x] -> [x] | |
788 | | ((Comma ",") as x) :: rest -> | |
789 | let (newlines,rest2) = Common.span is_whitespace rest in | |
790 | (match rest2 with | |
791 | (Cocci2 _) :: _ -> x :: drop_end_comma rest | |
792 | | _ -> drop_end_comma rest) | |
793 | | x :: xs -> x :: drop_end_comma xs | |
794 | ||
5636bb2c C |
795 | (* The following only works for the outermost function call. Stack records |
796 | the column of all open parentheses. Space_cell contains the most recent | |
797 | comma in the outermost function call. The goal is to decide whether this | |
798 | should be followed by a space or a newline and indent. *) | |
799 | let add_newlines toks tabbing_unit = | |
800 | let create_indent n = | |
801 | let (tu,tlen) = | |
802 | match tabbing_unit with | |
803 | Some ("\t",_) -> ("\t",8) | |
804 | | Some ("",_) -> ("\t",8) (* not sure why... *) | |
805 | | Some (s,_) -> (s,String.length s) (* assuming only spaces *) | |
806 | | None -> ("\t",8) in | |
807 | let rec loop seen = | |
808 | if seen + tlen <= n | |
809 | then tu ^ loop (seen + tlen) | |
810 | else String.make (n-seen) ' ' in | |
811 | loop 0 in | |
812 | let check_for_newline count x = function | |
813 | Some (start,space_cell) when count > Flag_parsing_c.max_width -> | |
814 | space_cell := "\n"^(create_indent x); | |
815 | Some (x + (count - start)) | |
816 | | _ -> None in | |
817 | (* the following is for strings that may contain newline *) | |
818 | let string_length s count = | |
819 | let l = list_of_string s in | |
820 | List.fold_left | |
821 | (function count -> | |
822 | function | |
823 | '\t' -> count + 8 | |
824 | | '\n' -> 0 | |
825 | | c -> count + 1) | |
826 | count l in | |
827 | let rec loop info count = function | |
828 | [] -> [] | |
829 | | ((T2(tok,_,_)) as a)::xs -> | |
830 | a :: loop info (string_length (TH.str_of_tok tok) count) xs | |
831 | | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs -> | |
832 | let (stack,space_cell) = info in | |
833 | let rest = | |
834 | match hint with | |
835 | None -> loop info (count + (String.length s)) xs | |
836 | | Some Unparse_cocci.StartBox -> | |
837 | let count = count + (String.length s) in | |
838 | loop (count::stack,space_cell) count xs | |
839 | | Some Unparse_cocci.EndBox -> | |
840 | let count = count + (String.length s) in | |
841 | (match stack with | |
842 | [x] -> | |
843 | (match check_for_newline count x space_cell with | |
844 | Some count -> loop ([],None) count xs | |
845 | | None -> loop ([],None) count xs) | |
846 | | _ -> loop (List.tl stack,space_cell) count xs) | |
847 | | Some (Unparse_cocci.SpaceOrNewline sp) -> | |
848 | let count = count + (String.length s) + 1 (*space*) in | |
849 | (match stack with | |
850 | [x] -> | |
851 | (match check_for_newline count x space_cell with | |
852 | Some count -> loop (stack,Some (x,sp)) count xs | |
853 | | None -> loop (stack,Some (count,sp)) count xs) | |
854 | | _ -> loop info count xs) in | |
855 | a :: rest | |
856 | | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs | |
c491d8ee | 857 | | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs |
5636bb2c C |
858 | | Fake2 :: _ | Indent_cocci2 :: _ |
859 | | Unindent_cocci2 _::_ -> | |
860 | failwith "unexpected fake, indent, or unindent" in | |
861 | let redo_spaces prev = function | |
862 | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) -> | |
863 | C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev | |
864 | | t -> t::prev in | |
865 | (match !Flag_parsing_c.spacing with | |
866 | Flag_parsing_c.SMPL -> toks | |
867 | | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks))) | |
34e49164 C |
868 | |
869 | (* When insert some new code, because of a + in a SP, we must add this | |
870 | * code at the right place, with the good indentation. So each time we | |
871 | * encounter some spacing info, with some newline, we maintain the | |
872 | * current indentation level used. | |
ae4735db | 873 | * |
34e49164 | 874 | * TODO problems: not accurate. ex: TODO |
ae4735db | 875 | * |
34e49164 C |
876 | * TODO: if in #define region, should add a \ \n |
877 | *) | |
ae4735db | 878 | let new_tabbing2 space = |
34e49164 C |
879 | (list_of_string space) |
880 | +> List.rev | |
b1b2de81 | 881 | +> Common.take_until (fun c -> c =<= '\n') |
34e49164 C |
882 | +> List.rev |
883 | +> List.map string_of_char | |
884 | +> String.concat "" | |
885 | ||
ae4735db | 886 | let new_tabbing a = |
34e49164 C |
887 | Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) |
888 | ||
889 | ||
ae4735db | 890 | let rec adjust_indentation xs = |
aa721442 | 891 | |
34e49164 C |
892 | let _current_tabbing = ref "" in |
893 | let tabbing_unit = ref None in | |
894 | ||
895 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
896 | ||
897 | (* try to pick a tabbing unit for the plus code *) | |
898 | let adjust_tabbing_unit old_tab new_tab = | |
b1b2de81 | 899 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
34e49164 C |
900 | then |
901 | let old_tab = list_of_string old_tab in | |
902 | let new_tab = list_of_string new_tab in | |
903 | let rec loop = function | |
904 | ([],new_tab) -> | |
905 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
906 | | (_,[]) -> failwith "not possible" | |
907 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
908 | loop (old_tab,new_tab) in | |
909 | ||
910 | let remtab tu current_tab = | |
911 | let current_tab = List.rev(list_of_string current_tab) in | |
912 | let rec loop = function | |
913 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
0708f913 | 914 | | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*) |
b1b2de81 | 915 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) |
34e49164 C |
916 | | (_,ns) -> (* mismatch; remove what we can *) |
917 | string_of_list (List.rev ns) in | |
918 | loop (tu,current_tab) in | |
919 | ||
920 | let rec find_first_tab started = function | |
921 | [] -> () | |
b1b2de81 | 922 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
34e49164 | 923 | find_first_tab true xs |
485bce71 | 924 | (* patch: coccinelle *) |
34e49164 C |
925 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_ |
926 | when started -> | |
927 | let s = str_of_token2 x +> new_tabbing in | |
928 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
929 | | x::xs -> find_first_tab started xs in | |
930 | find_first_tab false xs; | |
931 | ||
1eddfd50 C |
932 | let rec balanced ct = function |
933 | [] -> ct >= 0 | |
934 | | ((T2(tok,_,_)) as x)::xs -> | |
935 | (match str_of_token2 x with | |
936 | "(" -> balanced (ct+1) xs | |
937 | | ")" -> balanced (ct-1) xs | |
938 | | _ -> balanced ct xs) | |
939 | | x::xs -> balanced ct xs in | |
940 | ||
ae4735db | 941 | let rec aux started xs = |
34e49164 C |
942 | match xs with |
943 | | [] -> [] | |
485bce71 | 944 | (* patch: coccinelle *) |
34e49164 | 945 | | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: |
5636bb2c | 946 | ((Cocci2 ("{",_,_,_,_)) as a)::xs |
708f4980 | 947 | when started && str_of_token2 x =$= ")" -> |
34e49164 | 948 | (* to be done for if, etc, but not for a function header *) |
708f4980 | 949 | x::(C2 " ")::a::(aux started xs) |
1eddfd50 C |
950 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs |
951 | when balanced 0 (fst(Common.span (function x -> not(is_newline x)) xs)) -> | |
ae4735db | 952 | let old_tabbing = !_current_tabbing in |
34e49164 C |
953 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s); |
954 | (* only trust the indentation after the first { *) | |
c3e37e97 C |
955 | (if started |
956 | then adjust_tabbing_unit old_tabbing !_current_tabbing); | |
34e49164 C |
957 | let coccis_rest = Common.span all_coccis xs in |
958 | (match coccis_rest with | |
b1b2de81 | 959 | (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" -> |
34e49164 C |
960 | (* the case where cocci code has been added before a close } *) |
961 | x::aux started (Indent_cocci2::xs) | |
962 | | _ -> x::aux started xs) | |
963 | | Indent_cocci2::xs -> | |
964 | (match !tabbing_unit with | |
965 | None -> aux started xs | |
966 | | Some (tu,_) -> | |
967 | _current_tabbing := (!_current_tabbing)^tu; | |
5636bb2c | 968 | Cocci2 (tu,-1,-1,-1,None)::aux started xs) |
c3e37e97 | 969 | | Unindent_cocci2(permanent)::xs -> |
34e49164 C |
970 | (match !tabbing_unit with |
971 | None -> aux started xs | |
972 | | Some (_,tu) -> | |
973 | _current_tabbing := remtab tu (!_current_tabbing); | |
974 | aux started xs) | |
975 | (* border between existing code and cocci code *) | |
5636bb2c | 976 | | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs |
b1b2de81 | 977 | when str_of_token2 x =$= "{" -> |
34e49164 C |
978 | x::aux true (y::Indent_cocci2::xs) |
979 | | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs | |
b1b2de81 | 980 | when str_of_token2 y =$= "}" -> |
c3e37e97 | 981 | x::aux started (y::Unindent_cocci2 true::xs) |
34e49164 | 982 | (* starting the body of the function *) |
b1b2de81 | 983 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs |
5636bb2c C |
984 | | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs |
985 | | ((Cocci2("\n",_,_,_,_)) as x)::Unindent_cocci2(false)::xs -> | |
c3e37e97 | 986 | x::aux started xs |
5636bb2c | 987 | | ((Cocci2("\n",_,_,_,_)) as x)::xs -> |
0708f913 | 988 | (* dont inline in expr because of weird eval order of ocaml *) |
ae4735db | 989 | let s = !_current_tabbing in |
5636bb2c | 990 | x::Cocci2 (s,-1,-1,-1,None)::aux started xs |
34e49164 | 991 | | x::xs -> x::aux started xs in |
5636bb2c | 992 | (aux false xs,!tabbing_unit) |
34e49164 C |
993 | |
994 | ||
995 | let rec find_paren_comma = function | |
996 | | [] -> () | |
997 | ||
998 | (* do nothing if was like this in original file *) | |
999 | | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
ae4735db | 1000 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
1001 | find_paren_comma (x2::xs) |
1002 | ||
1003 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
ae4735db | 1004 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
1005 | find_paren_comma (x2::xs) |
1006 | ||
1007 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) | |
ae4735db | 1008 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
1009 | find_paren_comma (x2::xs) |
1010 | ||
1011 | (* otherwise yes can adjust *) | |
ae4735db | 1012 | | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> |
34e49164 C |
1013 | x2.remove <- true; |
1014 | find_paren_comma (x2::xs) | |
ae4735db | 1015 | | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> |
34e49164 C |
1016 | x1.remove <- true; |
1017 | find_paren_comma (x2::xs) | |
1018 | ||
ae4735db | 1019 | | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> |
34e49164 C |
1020 | x1.remove <- true; |
1021 | find_paren_comma (x2::xs) | |
1022 | ||
ae4735db | 1023 | | x::xs -> |
34e49164 | 1024 | find_paren_comma xs |
34e49164 | 1025 | |
ae4735db C |
1026 | |
1027 | let fix_tokens toks = | |
34e49164 C |
1028 | let toks = toks +> List.map mk_token_extended in |
1029 | ||
1030 | let cleaner = toks +> Common.exclude (function | |
1031 | | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
1032 | | _ -> false | |
1033 | ) in | |
1034 | find_paren_comma cleaner; | |
1035 | ||
1036 | let toks = rebuild_tokens_extented toks in | |
1037 | toks +> List.map (fun x -> x.tok2) | |
1038 | ||
1039 | ||
1040 | ||
1041 | (*****************************************************************************) | |
1042 | (* Final unparsing (and debugging support) *) | |
1043 | (*****************************************************************************) | |
1044 | ||
1045 | (* for debugging *) | |
1046 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
1047 | ||
1048 | let kind_of_token2 = function | |
1049 | | Fake2 -> KFake | |
1050 | | Cocci2 _ -> KCocci | |
1051 | | C2 _ -> KC | |
c491d8ee | 1052 | | Comma _ -> KC |
34e49164 C |
1053 | | T2 (t,_,_) -> |
1054 | (match TH.pinfo_of_tok t with | |
1055 | | ExpandedTok _ -> KExpanded | |
1056 | | OriginTok _ -> KOrigin | |
1057 | | FakeTok _ -> raise Impossible (* now a Fake2 *) | |
1058 | | AbstractLineTok _ -> raise Impossible (* now a KC *) | |
1059 | ) | |
c3e37e97 | 1060 | | Unindent_cocci2 _ | Indent_cocci2 -> raise Impossible |
ae4735db | 1061 | |
34e49164 C |
1062 | let end_mark = "!" |
1063 | ||
1064 | let start_mark = function | |
1065 | | KFake -> "!F!" | |
ae4735db | 1066 | | KCocci -> "!S!" |
34e49164 C |
1067 | | KC -> "!A!" |
1068 | | KExpanded -> "!E!" | |
1069 | | KOrigin -> "" | |
1070 | ||
1071 | let print_all_tokens2 pr xs = | |
1072 | if !Flag_parsing_c.debug_unparsing | |
1073 | then | |
1074 | let current_kind = ref KOrigin in | |
ae4735db | 1075 | xs +> List.iter (fun t -> |
34e49164 | 1076 | let newkind = kind_of_token2 t in |
b1b2de81 | 1077 | if newkind =*= !current_kind |
34e49164 C |
1078 | then pr (str_of_token2 t) |
1079 | else begin | |
1080 | pr (end_mark); | |
1081 | pr (start_mark newkind); | |
1082 | pr (str_of_token2 t); | |
1083 | current_kind := newkind | |
1084 | end | |
1085 | ); | |
ae4735db | 1086 | else |
34e49164 | 1087 | xs +> List.iter (fun x -> pr (str_of_token2 x)) |
ae4735db | 1088 | |
34e49164 C |
1089 | |
1090 | ||
1091 | ||
1092 | (*****************************************************************************) | |
1093 | (* Entry points *) | |
1094 | (*****************************************************************************) | |
1095 | ||
1096 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
1097 | * complete C file, including a modified C file by transformation.ml, | |
1098 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
1099 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
1100 | * fancy stuff when a function was not modified at all. Just need to | |
1101 | * print the list of token as-is. But now pretty_print_c.ml handles | |
1102 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
ae4735db C |
1103 | * optimize a little the pretty printing. |
1104 | * | |
34e49164 C |
1105 | * update: now have PPviastr which goes even faster than PPviatok, so |
1106 | * PPviatok has disappeared. | |
1107 | *) | |
1108 | ||
1109 | type ppmethod = PPnormal | PPviastr | |
1110 | ||
1111 | ||
1112 | ||
1113 | ||
1114 | (* The pp_program function will call pretty_print_c.ml with a special | |
ae4735db | 1115 | * function to print the leaf components, the tokens. When we want to |
34e49164 | 1116 | * print a token, we need to print also maybe the space and comments that |
ae4735db | 1117 | * were close to it in the original file (and that was omitted during the |
34e49164 C |
1118 | * parsing phase), and honor what the cocci-info attached to the token says. |
1119 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
ae4735db | 1120 | * print it and also print some cocci-code attached in a PLUS to it. |
34e49164 C |
1121 | * So we will also maybe call unparse_cocci. Because the cocci-code may |
1122 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
1123 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
1124 | *) | |
1125 | ||
ae4735db C |
1126 | let pp_program2 xs outfile = |
1127 | Common.with_open_outfile outfile (fun (pr,chan) -> | |
1128 | let pr s = | |
1129 | if !Flag_parsing_c.debug_unparsing | |
34e49164 | 1130 | then begin pr2_no_nl s; flush stderr end |
ae4735db | 1131 | else pr s |
34e49164 C |
1132 | (* flush chan; *) |
1133 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
1134 | in | |
ae4735db C |
1135 | |
1136 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
34e49164 C |
1137 | (* here can still work on ast *) |
1138 | let e = remove_useless_fakeInfo_struct e in | |
ae4735db | 1139 | |
34e49164 | 1140 | match ppmethod with |
002099fc | 1141 | | PPnormal -> |
34e49164 C |
1142 | (* now work on tokens *) |
1143 | ||
1144 | (* phase1: just get all the tokens, all the information *) | |
ae4735db | 1145 | assert(toks_e +> List.for_all (fun t -> |
34e49164 C |
1146 | TH.is_origin t or TH.is_expanded t |
1147 | )); | |
1148 | let toks = get_fakeInfo_and_tokens e toks_e in | |
1149 | let toks = displace_fake_nodes toks in | |
1150 | (* assert Origin;ExpandedTok;Faketok *) | |
1151 | let toks = expand_mcode toks in | |
c491d8ee | 1152 | |
34e49164 C |
1153 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) |
1154 | * and no tag information, just NOTHING. *) | |
fc1ad971 | 1155 | |
c491d8ee | 1156 | |
fc1ad971 C |
1157 | let toks = |
1158 | if !Flag.sgrep_mode2 | |
5626f154 C |
1159 | then |
1160 | (* nothing else to do for sgrep *) | |
1161 | drop_expanded_and_fake (drop_minus toks) | |
fc1ad971 C |
1162 | else |
1163 | (* phase2: can now start to filter and adjust *) | |
5626f154 | 1164 | let (toks,tu) = adjust_indentation toks in |
fc1ad971 | 1165 | let toks = adjust_before_semicolon toks in(*before remove minus*) |
aba5c457 C |
1166 | let toks = drop_space_at_endline toks in |
1167 | let toks = paren_to_space toks in | |
c491d8ee | 1168 | let toks = drop_end_comma toks in |
fc1ad971 C |
1169 | let toks = remove_minus_and_between_and_expanded_and_fake toks in |
1170 | (* assert Origin + Cocci + C and no minus *) | |
1171 | let toks = add_space toks in | |
5636bb2c | 1172 | let toks = add_newlines toks tu in |
fc1ad971 | 1173 | let toks = fix_tokens toks in |
5626f154 | 1174 | toks in |
34e49164 | 1175 | |
ae4735db | 1176 | (* in theory here could reparse and rework the ast! or |
34e49164 C |
1177 | * apply some SP. Not before cos julia may have generated |
1178 | * not parsable file. Need do unparsing_tricks call before being | |
1179 | * ready to reparse. *) | |
1180 | print_all_tokens2 pr toks; | |
1181 | ||
1182 | | PPviastr -> pr str | |
1183 | ) | |
1184 | ) | |
1185 | ||
ae4735db | 1186 | let pp_program a b = |
34e49164 C |
1187 | Common.profile_code "C unparsing" (fun () -> pp_program2 a b) |
1188 | ||
485bce71 | 1189 | |
ae4735db | 1190 | let pp_program_default xs outfile = |
485bce71 C |
1191 | let xs' = xs +> List.map (fun x -> x, PPnormal) in |
1192 | pp_program xs' outfile |