Commit | Line | Data |
---|---|---|
0708f913 C |
1 | (* Yoann Padioleau, Julia Lawall |
2 | * | |
3 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU | |
34e49164 C |
4 | * |
5 | * This program is free software; you can redistribute it and/or | |
6 | * modify it under the terms of the GNU General Public License (GPL) | |
7 | * version 2 as published by the Free Software Foundation. | |
8 | * | |
9 | * This program is distributed in the hope that it will be useful, | |
10 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 | * file license.txt for more details. | |
485bce71 C |
13 | * |
14 | * | |
15 | * Modifications by Julia Lawall for better newline handling. | |
34e49164 C |
16 | *) |
17 | open Common | |
18 | ||
19 | open Ast_c | |
20 | ||
21 | module TH = Token_helpers | |
22 | ||
23 | ||
b1b2de81 C |
24 | (* should keep comments and directives in between adjacent deleted terms, |
25 | but not comments and directives within deleted terms. should use the | |
26 | labels found in the control-flow graph *) | |
27 | ||
34e49164 C |
28 | |
29 | ||
708f4980 C |
30 | (*****************************************************************************) |
31 | (* Wrappers *) | |
32 | (*****************************************************************************) | |
33 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_unparsing | |
34 | ||
34e49164 C |
35 | (*****************************************************************************) |
36 | (* Types used during the intermediate phases of the unparsing *) | |
37 | (*****************************************************************************) | |
38 | ||
39 | type token1 = | |
40 | | Fake1 of info | |
41 | | T1 of Parser_c.token | |
42 | ||
43 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
44 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
45 | * token and get something simpler ? because we need to know if the | |
46 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
47 | * analysis so easier to keep with the token. | |
48 | * | |
49 | * This type contains the whole information. Have all the tokens with this | |
50 | * type. | |
51 | *) | |
708f4980 C |
52 | type min = |
53 | Min of (int list (* match numbers *) * int (* adjacency information *)) | |
54 | | Ctx | |
55 | ||
34e49164 | 56 | type token2 = |
708f4980 | 57 | | T2 of Parser_c.token * min * |
34e49164 C |
58 | int option (* orig index, abstracting away comments and space *) |
59 | | Fake2 | |
708f4980 | 60 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
34e49164 C |
61 | | C2 of string |
62 | | Indent_cocci2 | |
63 | | Unindent_cocci2 | |
64 | ||
65 | (* not used yet *) | |
66 | type token3 = | |
67 | | T3 of Parser_c.token | |
68 | | Cocci3 of string | |
69 | | C3 of string | |
70 | ||
71 | ||
72 | (* similar to the tech in parsing_hack *) | |
73 | type token_extended = { | |
74 | tok2 : token2; | |
75 | str : string; | |
76 | idx: int option; (* to know if 2 tokens were consecutive in orig file *) | |
77 | mutable new_tokens_before : token2 list; | |
78 | mutable remove : bool; | |
79 | } | |
80 | ||
81 | ||
82 | (*****************************************************************************) | |
83 | (* Helpers *) | |
84 | (*****************************************************************************) | |
85 | ||
86 | let info_of_token1 t = | |
87 | match t with | |
88 | | Fake1 info -> info | |
89 | | T1 tok -> TH.info_of_tok tok | |
90 | ||
002099fc C |
91 | let print_token1 = function |
92 | T1 tok -> TH.str_of_tok tok | |
93 | | Fake1 info -> "fake" | |
94 | ||
34e49164 C |
95 | let str_of_token2 = function |
96 | | T2 (t,_,_) -> TH.str_of_tok t | |
97 | | Fake2 -> "" | |
708f4980 | 98 | | Cocci2 (s,_,_,_) -> s |
34e49164 C |
99 | | C2 s -> s |
100 | | Indent_cocci2 -> "" | |
101 | | Unindent_cocci2 -> "" | |
102 | ||
103 | let print_token2 = function | |
708f4980 C |
104 | | T2 (t,b,_) -> |
105 | let b_str = | |
106 | match b with | |
107 | Min (index,adj) -> | |
108 | Printf.sprintf "-%d[%s]" adj | |
109 | (String.concat " " (List.map string_of_int index)) | |
110 | | Ctx -> "" in | |
111 | "T2:"^b_str^TH.str_of_tok t | |
fc1ad971 | 112 | | Fake2 -> "fake" |
708f4980 | 113 | | Cocci2 (s,_,lc,rc) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
34e49164 C |
114 | | C2 s -> "C2:"^s |
115 | | Indent_cocci2 -> "Indent" | |
116 | | Unindent_cocci2 -> "Unindent" | |
117 | ||
002099fc C |
118 | let simple_print_all_tokens1 l = |
119 | List.iter (function x -> Printf.printf "%s " (print_token1 x)) l; | |
120 | Printf.printf "\n" | |
121 | ||
708f4980 | 122 | let simple_print_all_tokens2 l = |
fc1ad971 | 123 | List.iter (function x -> Printf.printf "|%s| " (print_token2 x)) l; |
34e49164 C |
124 | Printf.printf "\n" |
125 | ||
126 | let str_of_token3 = function | |
127 | | T3 t -> TH.str_of_tok t | |
128 | | Cocci3 s | C3 s -> s | |
129 | ||
130 | ||
131 | ||
132 | let mk_token_extended x = | |
133 | let origidx = | |
134 | match x with | |
135 | | T2 (_,_, idx) -> idx | |
136 | | _ -> None | |
137 | in | |
138 | { tok2 = x; | |
139 | str = str_of_token2 x; | |
140 | idx = origidx; | |
141 | new_tokens_before = []; | |
142 | remove = false; | |
143 | } | |
144 | ||
145 | let rebuild_tokens_extented toks_ext = | |
146 | let _tokens = ref [] in | |
147 | toks_ext +> List.iter (fun tok -> | |
148 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); | |
149 | if not tok.remove then push2 tok.tok2 _tokens; | |
150 | ); | |
151 | let tokens = List.rev !_tokens in | |
152 | (tokens +> List.map mk_token_extended) | |
153 | ||
154 | ||
155 | let mcode_contain_plus = function | |
156 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
157 | | Ast_cocci.CONTEXT _ -> true | |
485bce71 | 158 | (* patch: when need full coccinelle transformation *) |
708f4980 C |
159 | | Ast_cocci.MINUS (_,_,_,[]) -> false |
160 | | Ast_cocci.MINUS (_,_,_,x::xs) -> true | |
951c7801 | 161 | | Ast_cocci.PLUS _ -> raise Impossible |
34e49164 C |
162 | |
163 | let contain_plus info = | |
164 | let mck = Ast_c.mcode_of_info info in | |
165 | mcode_contain_plus mck | |
166 | ||
167 | (*****************************************************************************) | |
168 | (* Last fix on the ast *) | |
169 | (*****************************************************************************) | |
170 | ||
171 | (* Because of the ugly trick to handle initialiser, I generate fake ',' | |
172 | * for the last initializer element, but if there is nothing around it, | |
173 | * I don't want in the end to print it. | |
174 | *) | |
175 | ||
176 | let remove_useless_fakeInfo_struct program = | |
177 | let bigf = { Visitor_c.default_visitor_c_s with | |
178 | Visitor_c.kini_s = (fun (k,bigf) ini -> | |
179 | match k ini with | |
113803cf | 180 | | InitList args, ii -> |
34e49164 C |
181 | (match ii with |
182 | | [_i1;_i2] -> ini | |
113803cf | 183 | | [i1;i2;iicommaopt] -> |
34e49164 C |
184 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) |
185 | && (Ast_c.is_fake iicommaopt) | |
186 | (* sometimes the guy put a normal iicommaopt *) | |
34e49164 C |
187 | then InitList args, [i1;i2] |
188 | else InitList args, [i1;i2;iicommaopt] | |
113803cf C |
189 | | [i1;i2;iicommaopt;end_comma_opt] -> |
190 | (* only in #define. end_comma_opt canot be fake *) | |
191 | (* not sure if this will be considered ambiguous with a previous | |
192 | case? *) | |
193 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) | |
194 | && (Ast_c.is_fake iicommaopt) | |
195 | (* sometimes the guy put a normal iicommaopt *) | |
196 | then InitList args, [i1;i2;end_comma_opt] | |
197 | else InitList args, [i1;i2;iicommaopt;end_comma_opt] | |
34e49164 C |
198 | | _ -> raise Impossible |
199 | ) | |
200 | | x -> x | |
201 | ) | |
202 | } in | |
203 | Visitor_c.vk_toplevel_s bigf program | |
204 | ||
205 | ||
206 | (*****************************************************************************) | |
207 | (* Tokens1 generation *) | |
208 | (*****************************************************************************) | |
209 | ||
210 | let get_fakeInfo_and_tokens celem toks = | |
211 | let toks_in = ref toks in | |
212 | let toks_out = ref [] in | |
213 | ||
214 | (* todo? verify good order of position ? *) | |
215 | let pr_elem info = | |
216 | match Ast_c.pinfo_of_info info with | |
217 | | FakeTok _ -> | |
218 | Common.push2 (Fake1 info) toks_out | |
219 | | OriginTok _ | ExpandedTok _ -> | |
220 | (* get the associated comments/space/cppcomment tokens *) | |
978fd7e5 C |
221 | let (before, x, after) = |
222 | !toks_in +> Common.split_when (fun tok -> | |
34e49164 C |
223 | info =*= TH.info_of_tok tok) |
224 | in | |
b1b2de81 | 225 | assert(info =*= TH.info_of_tok x); |
34e49164 C |
226 | (*old: assert(before +> List.for_all (TH.is_comment)); *) |
227 | before +> List.iter (fun x -> | |
228 | if not (TH.is_comment x) | |
0708f913 | 229 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) |
34e49164 C |
230 | (* case such as int asm d3("x"); not yet in ast *) |
231 | ); | |
232 | before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); | |
233 | push2 (T1 x) toks_out; | |
234 | toks_in := after; | |
235 | | AbstractLineTok _ -> | |
236 | (* can be called on type info when for instance use -type_c *) | |
237 | if !Flag_parsing_c.pretty_print_type_info | |
238 | then Common.push2 (Fake1 info) toks_out | |
239 | else raise Impossible (* at this stage *) | |
240 | in | |
241 | ||
242 | let pr_space _ = () in (* use the spacing that is there already *) | |
243 | ||
244 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
245 | ||
246 | if not (null !toks_in) | |
0708f913 | 247 | then failwith "WEIRD: unparsing not finished"; |
34e49164 C |
248 | |
249 | List.rev !toks_out | |
250 | ||
251 | (* Fake nodes that have BEFORE code should be moved over any subsequent | |
252 | whitespace and newlines, but not any comments, to get as close to the affected | |
002099fc | 253 | code as possible. Similarly, fake nodes that have AFTER code should be moved |
34e49164 C |
254 | backwards. No fake nodes should have both before and after code. *) |
255 | ||
256 | let displace_fake_nodes toks = | |
257 | let is_fake = function Fake1 _ -> true | _ -> false in | |
258 | let is_whitespace = function | |
259 | T1(Parser_c.TCommentSpace _) | |
485bce71 | 260 | (* patch: cocci *) |
34e49164 C |
261 | | T1(Parser_c.TCommentNewline _) -> true |
262 | | _ -> false in | |
263 | let rec loop toks = | |
264 | let fake_info = | |
265 | try Some (Common.split_when is_fake toks) | |
266 | with Not_found -> None in | |
267 | match fake_info with | |
268 | Some(bef,((Fake1 info) as fake),aft) -> | |
269 | (match !(info.cocci_tag) with | |
708f4980 C |
270 | | Some x -> |
271 | (match x with | |
002099fc | 272 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> |
34e49164 | 273 | (* move the fake node forwards *) |
002099fc C |
274 | let (whitespace,rest) = Common.span is_whitespace aft in |
275 | bef @ whitespace @ fake :: (loop rest) | |
276 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
34e49164 | 277 | (* move the fake node backwards *) |
002099fc C |
278 | let revbef = List.rev bef in |
279 | let (revwhitespace,revprev) = Common.span is_whitespace revbef in | |
280 | let whitespace = List.rev revwhitespace in | |
281 | let prev = List.rev revprev in | |
282 | prev @ fake :: (loop (whitespace @ aft)) | |
283 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) -> | |
284 | bef @ fake :: (loop aft) | |
285 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
286 | failwith "fake node should not be before-after" | |
287 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) | |
485bce71 | 288 | ) |
708f4980 C |
289 | | None -> |
290 | bef @ fake :: (loop aft) | |
291 | ) | |
34e49164 C |
292 | | None -> toks |
293 | | _ -> raise Impossible in | |
294 | loop toks | |
295 | ||
296 | (*****************************************************************************) | |
297 | (* Tokens2 generation *) | |
298 | (*****************************************************************************) | |
299 | ||
b1b2de81 C |
300 | let comment2t2 = function |
301 | (Token_c.TCommentCpp x,(info : Token_c.info)) -> | |
302 | C2("\n"^info.Common.str^"\n") | |
303 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Common.dump x)) | |
304 | ||
34e49164 C |
305 | let expand_mcode toks = |
306 | let toks_out = ref [] in | |
307 | ||
308 | let index = ref 0 in | |
309 | ||
310 | let add_elem t minus = | |
311 | match t with | |
312 | | Fake1 info -> | |
313 | let str = Ast_c.str_of_info info in | |
b1b2de81 | 314 | if str =$= "" |
34e49164 C |
315 | then push2 (Fake2) toks_out |
316 | (* perhaps the fake ',' *) | |
317 | else push2 (C2 str) toks_out | |
318 | ||
319 | ||
002099fc | 320 | | T1 tok -> |
b1b2de81 | 321 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
34e49164 | 322 | (* no tag on expandedTok ! *) |
fc1ad971 C |
323 | let modified = function |
324 | None -> false | |
325 | | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false | |
326 | | _ -> true in | |
327 | (if (TH.is_expanded tok && | |
328 | modified !((TH.info_of_tok tok).cocci_tag) | |
329 | (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*)) | |
0708f913 C |
330 | then |
331 | failwith | |
332 | (Printf.sprintf | |
333 | "expanded token %s on line %d is either modified or stored in a metavariable" | |
334 | (TH.str_of_tok tok) (TH.line_of_tok tok))); | |
34e49164 C |
335 | |
336 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> | |
337 | { i with cocci_tag = ref Ast_c.emptyAnnot; } | |
338 | ) in | |
339 | ||
340 | let optindex = | |
341 | if TH.is_origin tok && not (TH.is_real_comment tok) | |
342 | then begin | |
343 | incr index; | |
344 | Some !index | |
345 | end | |
346 | else None | |
347 | in | |
348 | ||
349 | push2 (T2 (tok', minus, optindex)) toks_out | |
350 | in | |
351 | ||
352 | let expand_info t = | |
708f4980 C |
353 | let (mcode,env) = |
354 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).cocci_tag) in | |
34e49164 | 355 | |
708f4980 C |
356 | let pr_cocci s ln col rcol = |
357 | push2 (Cocci2(s,ln,col,rcol)) toks_out in | |
34e49164 | 358 | let pr_c info = |
b1b2de81 C |
359 | (match Ast_c.pinfo_of_info info with |
360 | Ast_c.AbstractLineTok _ -> | |
361 | push2 (C2 (Ast_c.str_of_info info)) toks_out | |
362 | | Ast_c.FakeTok (s,_) -> | |
363 | push2 (C2 s) toks_out | |
34e49164 C |
364 | | _ -> |
365 | Printf.printf "line: %s\n" (Common.dump info); | |
b1b2de81 C |
366 | failwith "not an abstract line"); |
367 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> | |
368 | List.iter (fun x -> Common.push2 (comment2t2 x) toks_out) in | |
369 | ||
708f4980 C |
370 | let pr_barrier ln col = (* marks a position, used around C code *) |
371 | push2 (Cocci2("",ln,col,col)) toks_out in | |
372 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) | |
b1b2de81 | 373 | |
708f4980 | 374 | let pr_cspace _ = push2 (C2 " ") toks_out in |
34e49164 | 375 | |
708f4980 C |
376 | let pr_space _ = () (* rely on add_space in cocci code *) in |
377 | let pr_arity _ = () (* not interested *) in | |
34e49164 C |
378 | |
379 | let indent _ = push2 Indent_cocci2 toks_out in | |
380 | let unindent _ = push2 Unindent_cocci2 toks_out in | |
381 | ||
708f4980 C |
382 | let args_pp = |
383 | (env, pr_cocci, pr_c, pr_cspace, | |
384 | (match !Flag_parsing_c.spacing with | |
385 | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), | |
386 | pr_arity, | |
387 | (match !Flag_parsing_c.spacing with | |
388 | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), | |
389 | indent, unindent) in | |
34e49164 | 390 | |
485bce71 C |
391 | (* old: when for yacfe with partial cocci: |
392 | * add_elem t false; | |
393 | *) | |
34e49164 | 394 | |
485bce71 | 395 | (* patch: when need full coccinelle transformation *) |
faf9a90c | 396 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
34e49164 | 397 | match mcode with |
708f4980 | 398 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
34e49164 C |
399 | (* Why adding ? because I want to have all the information, the whole |
400 | * set of tokens, so I can then process and remove the | |
401 | * is_between_two_minus for instance *) | |
708f4980 | 402 | add_elem t (Min (inst,adj)); |
faf9a90c | 403 | unparser any_xxs Unparse_cocci.InPlace |
34e49164 C |
404 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
405 | (match any_befaft with | |
406 | | Ast_cocci.NOTHING -> | |
708f4980 | 407 | add_elem t Ctx |
951c7801 | 408 | | Ast_cocci.BEFORE (xxs,_) -> |
faf9a90c | 409 | unparser xxs Unparse_cocci.Before; |
708f4980 | 410 | add_elem t Ctx |
951c7801 | 411 | | Ast_cocci.AFTER (xxs,_) -> |
708f4980 | 412 | add_elem t Ctx; |
faf9a90c | 413 | unparser xxs Unparse_cocci.After; |
951c7801 | 414 | | Ast_cocci.BEFOREAFTER (xxs, yys, _) -> |
faf9a90c | 415 | unparser xxs Unparse_cocci.Before; |
708f4980 | 416 | add_elem t Ctx; |
faf9a90c | 417 | unparser yys Unparse_cocci.After; |
34e49164 | 418 | ) |
951c7801 | 419 | | Ast_cocci.PLUS _ -> raise Impossible |
34e49164 C |
420 | |
421 | in | |
422 | ||
423 | toks +> List.iter expand_info; | |
424 | List.rev !toks_out | |
425 | ||
426 | ||
427 | (*****************************************************************************) | |
428 | (* Tokens2 processing, filtering, adjusting *) | |
429 | (*****************************************************************************) | |
430 | ||
002099fc | 431 | let is_space = function |
fc1ad971 C |
432 | | T2(Parser_c.TCommentSpace _,_b,_i) -> true (* only whitespace *) |
433 | | _ -> false | |
434 | ||
435 | let is_newline = function | |
436 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
437 | | _ -> false | |
438 | ||
439 | let is_whitespace = function | |
002099fc C |
440 | | (T2 (t,_b,_i)) -> |
441 | (match t with | |
442 | | Parser_c.TCommentSpace _ -> true (* only whitespace *) | |
fc1ad971 | 443 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
002099fc C |
444 | | _ -> false |
445 | ) | |
446 | | _ -> false | |
447 | ||
34e49164 | 448 | let is_minusable_comment = function |
b1b2de81 | 449 | | (T2 (t,_b,_i)) -> |
34e49164 C |
450 | (match t with |
451 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
485bce71 | 452 | (* patch: coccinelle *) |
002099fc | 453 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
7f004419 | 454 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
002099fc C |
455 | | Parser_c.TComment _ |
456 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
b1b2de81 C |
457 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
458 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) (* result was false *) | |
34e49164 C |
459 | -> true |
460 | ||
b1b2de81 | 461 | | Parser_c.TCommentMisc _ |
0708f913 | 462 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) |
34e49164 C |
463 | -> false |
464 | ||
465 | | _ -> false | |
466 | ) | |
467 | | _ -> false | |
468 | ||
fc1ad971 C |
469 | let is_minusable_comment_nocpp = function |
470 | | (T2 (t,_b,_i)) -> | |
471 | (match t with | |
472 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
473 | (* patch: coccinelle *) | |
474 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true | |
7f004419 | 475 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
fc1ad971 C |
476 | | Parser_c.TComment _ -> true |
477 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) | |
478 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) | |
479 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
480 | -> false | |
481 | ||
482 | | Parser_c.TCommentMisc _ | |
483 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) | |
484 | -> false | |
485 | ||
486 | | _ -> false | |
487 | ) | |
488 | | _ -> false | |
489 | ||
34e49164 C |
490 | let all_coccis = function |
491 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true | |
492 | | _ -> false | |
493 | ||
b1b2de81 C |
494 | (*previously gave up if the first character was a newline, but not clear why*) |
495 | let is_minusable_comment_or_plus x = is_minusable_comment x or all_coccis x | |
34e49164 | 496 | |
708f4980 C |
497 | let set_minus_comment adj = function |
498 | | T2 (t,Ctx,idx) -> | |
34e49164 C |
499 | let str = TH.str_of_tok t in |
500 | (match t with | |
501 | | Parser_c.TCommentSpace _ | |
485bce71 | 502 | (* patch: coccinelle *) |
34e49164 C |
503 | | Parser_c.TCommentNewline _ -> () |
504 | ||
505 | | Parser_c.TComment _ | |
0708f913 | 506 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
b1b2de81 C |
507 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
508 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) | |
34e49164 | 509 | -> |
b1b2de81 C |
510 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" |
511 | (TH.line_of_tok t) str) | |
34e49164 C |
512 | | _ -> raise Impossible |
513 | ); | |
708f4980 | 514 | T2 (t, Min adj, idx) |
485bce71 | 515 | (* patch: coccinelle *) |
fc1ad971 | 516 | | T2 (t,Min adj,idx) as x -> x |
34e49164 C |
517 | | _ -> raise Impossible |
518 | ||
708f4980 | 519 | let set_minus_comment_or_plus adj = function |
34e49164 | 520 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x |
708f4980 | 521 | | x -> set_minus_comment adj x |
34e49164 | 522 | |
fc1ad971 C |
523 | let drop_minus xs = |
524 | xs +> Common.exclude (function | |
525 | | T2 (t,Min adj,_) -> true | |
526 | | _ -> false | |
527 | ) | |
528 | ||
34e49164 C |
529 | let remove_minus_and_between_and_expanded_and_fake xs = |
530 | ||
531 | (* get rid of exampled and fake tok *) | |
532 | let xs = xs +> Common.exclude (function | |
533 | | T2 (t,_,_) when TH.is_expanded t -> true | |
534 | | Fake2 -> true | |
535 | ||
536 | | _ -> false | |
537 | ) | |
538 | in | |
539 | ||
113803cf | 540 | let minus_or_comment = function |
708f4980 | 541 | T2(_,Min adj,_) -> true |
113803cf C |
542 | | x -> is_minusable_comment x in |
543 | ||
fc1ad971 C |
544 | let minus_or_comment_nocpp = function |
545 | T2(_,Min adj,_) -> true | |
546 | | x -> is_minusable_comment_nocpp x in | |
547 | ||
548 | let common_adj (index1,adj1) (index2,adj2) = | |
549 | adj1 = adj2 (* same adjacency info *) && | |
550 | (* non-empty intersection of witness trees *) | |
551 | not ((Common.inter_set index1 index2) = []) in | |
552 | ||
553 | let rec adjust_around_minus = function | |
554 | [] -> [] | |
555 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x):: | |
556 | (((T2(_,Min adj,_))::_) as rest) -> | |
557 | (* an initial newline, as in a replaced statement *) | |
558 | let (between_minus,rest) = Common.span minus_or_comment rest in | |
559 | (match rest with | |
560 | [] -> (set_minus_comment adj x) :: | |
561 | (List.map (set_minus_comment adj) between_minus) | |
562 | | T2(_,Ctx,_)::_ when is_newline (List.hd(List.rev between_minus)) -> | |
563 | (set_minus_comment adj x)::(adjust_within_minus between_minus) @ | |
564 | (adjust_around_minus rest) | |
565 | | _ -> | |
566 | x :: (adjust_within_minus between_minus) @ | |
567 | (adjust_around_minus rest)) | |
568 | | ((T2(_,Min adj,_))::_) as rest -> | |
569 | (* no initial newline, as in a replaced expression *) | |
570 | let (between_minus,rest) = Common.span minus_or_comment rest in | |
571 | (match rest with | |
572 | [] -> | |
573 | (List.map (set_minus_comment adj) between_minus) | |
574 | | _ -> | |
575 | (adjust_within_minus between_minus) @ | |
576 | (adjust_around_minus rest)) | |
577 | | x::xs -> x::adjust_around_minus xs | |
578 | and adjust_within_minus = function | |
579 | [] -> [] | |
580 | | (T2(_,Min adj1,_) as t1)::xs -> | |
581 | let (between_minus,rest) = Common.span is_minusable_comment xs in | |
582 | (match rest with | |
583 | [] -> | |
584 | (* keep last newline *) | |
585 | let (drop,keep) = | |
586 | try | |
587 | let (drop,nl,keep) = | |
588 | Common.split_when is_newline between_minus in | |
589 | (drop, nl :: keep) | |
590 | with Not_found -> (between_minus,[]) in | |
591 | t1 :: | |
592 | List.map (set_minus_comment_or_plus adj1) drop @ | |
593 | keep | |
594 | | (T2(_,Min adj2,_) as t2)::rest when common_adj adj1 adj2 -> | |
595 | t1:: | |
596 | List.map (set_minus_comment_or_plus adj1) between_minus @ | |
597 | adjust_within_minus (t2::rest) | |
598 | | x::xs -> | |
599 | t1::(between_minus @ adjust_within_minus (x::xs))) | |
600 | | _ -> failwith "only minus and space possible" in | |
601 | ||
602 | (* new idea: collects regions not containing non-space context code | |
603 | if two adjacent adjacent minus tokens satisfy common_adj then delete | |
604 | all spaces, comments etc between them | |
605 | if two adjacent minus tokens do not satisfy common_adj only delete | |
606 | the spaces between them if there are no comments, etc. | |
607 | if the region contain no plus code and is both preceded and followed | |
608 | by a newline, delete the initial newline. *) | |
609 | ||
610 | let rec adjust_around_minus = function | |
34e49164 | 611 | [] -> [] |
708f4980 | 612 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x):: |
fc1ad971 C |
613 | (T2(_,Min adj1,_) as t1)::xs -> |
614 | let (minus_list,rest) = Common.span not_context (t1::xs) in | |
615 | let contains_plus = List.exists is_plus minus_list in | |
616 | let x = | |
617 | match List.rev minus_list with | |
618 | (T2(Parser_c.TCommentNewline c,_b,_i))::rest | |
619 | when List.for_all minus_or_comment minus_list -> | |
620 | set_minus_comment_or_plus adj1 x | |
621 | | _ -> x in | |
622 | x :: adjust_within_minus contains_plus minus_list @ | |
623 | adjust_around_minus rest | |
624 | | (T2(_,Min adj1,_) as t1)::xs -> | |
625 | let (minus_list,rest) = Common.span not_context (t1::xs) in | |
626 | let contains_plus = List.exists is_plus minus_list in | |
627 | adjust_within_minus contains_plus minus_list @ adjust_around_minus rest | |
628 | | x::xs -> x :: adjust_around_minus xs | |
629 | and adjust_within_minus cp (* contains plus *) = function | |
630 | (T2(_,Min adj1,_) as t1)::xs -> | |
631 | let not_minus = function T2(_,Min _,_) -> false | _ -> true in | |
632 | let (not_minus_list,rest) = Common.span not_minus xs in | |
633 | t1 :: | |
34e49164 | 634 | (match rest with |
fc1ad971 C |
635 | (T2(_,Min adj2,_) as t2)::xs when common_adj adj1 adj2 -> |
636 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) | |
637 | @ (adjust_within_minus cp (t2::xs)) | |
638 | | (T2(_,Min adj2,_) as t2)::xs -> | |
639 | let is_whitespace_or_plus = function | |
640 | (T2 _) as x -> is_space x | |
641 | | _ -> true (*plus*) in | |
642 | if List.for_all is_whitespace_or_plus not_minus_list | |
643 | then | |
644 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) | |
645 | @ (adjust_within_minus cp (t2::xs)) | |
646 | else not_minus_list @ (adjust_within_minus cp (t2::xs)) | |
647 | | _ -> | |
648 | if cp | |
649 | then xs | |
650 | else | |
651 | let (spaces,rest) = Common.span is_space xs in | |
652 | (List.map (set_minus_comment_or_plus adj1) spaces) | |
653 | @ rest) | |
654 | | xs -> failwith "should always start with minus" | |
655 | and not_context = function | |
656 | (T2(_,Ctx,_) as x) when not (is_minusable_comment x) -> false | |
657 | | _ -> true | |
658 | and is_plus = function | |
659 | C2 _ | Cocci2 _ -> true | |
660 | | _ -> false in | |
34e49164 | 661 | |
fc1ad971 | 662 | let xs = adjust_around_minus xs in |
34e49164 | 663 | |
113803cf | 664 | (* this drops blank lines after a brace introduced by removing code *) |
fc1ad971 C |
665 | let minus_or_comment_nonl = function |
666 | T2(_,Min adj,_) -> true | |
667 | | T2(Parser_c.TCommentNewline _,_b,_i) -> false | |
668 | | x -> is_minusable_comment x in | |
669 | ||
113803cf C |
670 | let rec adjust_after_brace = function |
671 | [] -> [] | |
708f4980 | 672 | | ((T2(_,Ctx,_)) as x)::((T2(_,Min adj,_)::_) as xs) |
b1b2de81 | 673 | when str_of_token2 x =$= "{" -> |
fc1ad971 | 674 | let (between_minus,rest) = Common.span minus_or_comment_nonl xs in |
113803cf C |
675 | let is_whitespace = function |
676 | T2(Parser_c.TCommentSpace _,_b,_i) | |
677 | (* patch: cocci *) | |
678 | | T2(Parser_c.TCommentNewline _,_b,_i) -> true | |
679 | | _ -> false in | |
680 | let (newlines,rest) = Common.span is_whitespace rest in | |
681 | let (drop_newlines,last_newline) = | |
682 | let rec loop = function | |
683 | [] -> ([],[]) | |
684 | | ((T2(Parser_c.TCommentNewline _,_b,_i)) as x) :: rest -> | |
685 | (List.rev rest,[x]) | |
686 | | x::xs -> | |
687 | let (drop_newlines,last_newline) = loop xs in | |
688 | (drop_newlines,x::last_newline) in | |
689 | loop (List.rev newlines) in | |
708f4980 | 690 | x::between_minus@(List.map (set_minus_comment adj) drop_newlines)@ |
113803cf C |
691 | last_newline@ |
692 | adjust_after_brace rest | |
693 | | x::xs -> x::adjust_after_brace xs in | |
694 | ||
695 | let xs = adjust_after_brace xs in | |
696 | ||
fc1ad971 C |
697 | (* search backwards from context } over spaces until reaching a newline. |
698 | then go back over all minus code until reaching some context or + code. | |
699 | get rid of all intervening spaces, newlines, and comments | |
700 | input is reversed *) | |
701 | let rec adjust_before_brace = function | |
702 | [] -> [] | |
703 | | ((T2(t,Ctx,_)) as x)::xs when str_of_token2 x =$= "}" or is_newline x -> | |
704 | let (outer_spaces,rest) = Common.span is_space xs in | |
705 | x :: outer_spaces @ | |
706 | (match rest with | |
707 | ((T2 (Parser_c.TCommentNewline _,Ctx,_i)) as h) :: | |
708 | (* the rest of this code is the same as from_newline below | |
709 | but merging them seems to be error prone... *) | |
710 | ((T2 (t, Min adj, idx)) as m) :: rest -> | |
711 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
712 | h :: m :: | |
713 | (List.map (set_minus_comment adj) spaces) @ | |
714 | (adjust_before_brace rest) | |
715 | | _ -> adjust_before_brace rest) | |
716 | | x::xs -> x :: (adjust_before_brace xs) in | |
717 | ||
718 | let from_newline = function | |
719 | ((T2 (t, Min adj, idx)) as m) :: rest -> | |
720 | let (spaces,rest) = Common.span minus_or_comment_nocpp rest in | |
721 | m :: | |
722 | (List.map (set_minus_comment adj) spaces) @ | |
723 | (adjust_before_brace rest) | |
724 | | rest -> adjust_before_brace rest in | |
725 | ||
726 | let xs = List.rev (from_newline (List.rev xs)) in | |
727 | let xs = drop_minus xs in | |
34e49164 C |
728 | xs |
729 | ||
faf9a90c C |
730 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
731 | let adjust_before_semicolon toks = | |
732 | let toks = List.rev toks in | |
002099fc | 733 | let rec search_semic = function |
faf9a90c | 734 | [] -> [] |
002099fc | 735 | | ((T2(_,Ctx,_)) as x)::xs | ((Cocci2 _) as x)::xs -> |
faf9a90c | 736 | if List.mem (str_of_token2 x) [";";")";","] |
002099fc C |
737 | then x :: search_minus false xs |
738 | else x :: search_semic xs | |
739 | | x::xs -> x :: search_semic xs | |
740 | and search_minus seen_minus xs = | |
741 | let (spaces, rest) = Common.span is_space xs in | |
742 | (* only delete spaces if something is actually deleted *) | |
743 | match rest with | |
744 | ((T2(_,Min _,_)) as a)::rerest -> a :: search_minus true rerest | |
745 | | _ -> if seen_minus then rest else xs in | |
746 | List.rev (search_semic toks) | |
34e49164 C |
747 | |
748 | let is_ident_like s = s ==~ Common.regexp_alpha | |
749 | ||
750 | let rec add_space xs = | |
751 | match xs with | |
752 | | [] -> [] | |
753 | | [x] -> [x] | |
708f4980 C |
754 | | (Cocci2(sx,lnx,_,rcolx) as x)::((Cocci2(sy,lny,lcoly,_)) as y)::xs |
755 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && | |
756 | not (lnx = -1) && lnx = lny && not (rcolx = -1) && rcolx < lcoly -> | |
757 | (* this only works within a line. could consider whether | |
758 | something should be done to add newlines too, rather than | |
759 | printing them explicitly in unparse_cocci. *) | |
760 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) | |
34e49164 C |
761 | | x::y::xs -> |
762 | let sx = str_of_token2 x in | |
763 | let sy = str_of_token2 y in | |
faf9a90c | 764 | if is_ident_like sx && is_ident_like sy |
34e49164 C |
765 | then x::C2 " "::(add_space (y::xs)) |
766 | else x::(add_space (y::xs)) | |
767 | ||
768 | ||
769 | ||
770 | (* When insert some new code, because of a + in a SP, we must add this | |
771 | * code at the right place, with the good indentation. So each time we | |
772 | * encounter some spacing info, with some newline, we maintain the | |
773 | * current indentation level used. | |
774 | * | |
775 | * TODO problems: not accurate. ex: TODO | |
776 | * | |
777 | * TODO: if in #define region, should add a \ \n | |
778 | *) | |
779 | let new_tabbing2 space = | |
780 | (list_of_string space) | |
781 | +> List.rev | |
b1b2de81 | 782 | +> Common.take_until (fun c -> c =<= '\n') |
34e49164 C |
783 | +> List.rev |
784 | +> List.map string_of_char | |
785 | +> String.concat "" | |
786 | ||
787 | let new_tabbing a = | |
788 | Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) | |
789 | ||
790 | ||
791 | let rec adjust_indentation xs = | |
792 | let _current_tabbing = ref "" in | |
793 | let tabbing_unit = ref None in | |
794 | ||
795 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
796 | ||
797 | (* try to pick a tabbing unit for the plus code *) | |
798 | let adjust_tabbing_unit old_tab new_tab = | |
b1b2de81 | 799 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
34e49164 C |
800 | then |
801 | let old_tab = list_of_string old_tab in | |
802 | let new_tab = list_of_string new_tab in | |
803 | let rec loop = function | |
804 | ([],new_tab) -> | |
805 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
806 | | (_,[]) -> failwith "not possible" | |
807 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
808 | loop (old_tab,new_tab) in | |
809 | ||
810 | let remtab tu current_tab = | |
811 | let current_tab = List.rev(list_of_string current_tab) in | |
812 | let rec loop = function | |
813 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
0708f913 | 814 | | (_,[]) -> "" (*weird; tabbing unit used up more than the current tab*) |
b1b2de81 | 815 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) |
34e49164 C |
816 | | (_,ns) -> (* mismatch; remove what we can *) |
817 | string_of_list (List.rev ns) in | |
818 | loop (tu,current_tab) in | |
819 | ||
820 | let rec find_first_tab started = function | |
821 | [] -> () | |
b1b2de81 | 822 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
34e49164 | 823 | find_first_tab true xs |
485bce71 | 824 | (* patch: coccinelle *) |
34e49164 C |
825 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_ |
826 | when started -> | |
827 | let s = str_of_token2 x +> new_tabbing in | |
828 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
829 | | x::xs -> find_first_tab started xs in | |
830 | find_first_tab false xs; | |
831 | ||
832 | let rec aux started xs = | |
833 | match xs with | |
834 | | [] -> [] | |
485bce71 | 835 | (* patch: coccinelle *) |
34e49164 | 836 | | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: |
708f4980 C |
837 | ((Cocci2 ("{",_,_,_)) as a)::xs |
838 | when started && str_of_token2 x =$= ")" -> | |
34e49164 | 839 | (* to be done for if, etc, but not for a function header *) |
708f4980 | 840 | x::(C2 " ")::a::(aux started xs) |
34e49164 C |
841 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs -> |
842 | let old_tabbing = !_current_tabbing in | |
843 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s); | |
844 | (* only trust the indentation after the first { *) | |
845 | (if started then adjust_tabbing_unit old_tabbing !_current_tabbing); | |
846 | let coccis_rest = Common.span all_coccis xs in | |
847 | (match coccis_rest with | |
b1b2de81 | 848 | (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y =$= "}" -> |
34e49164 C |
849 | (* the case where cocci code has been added before a close } *) |
850 | x::aux started (Indent_cocci2::xs) | |
851 | | _ -> x::aux started xs) | |
852 | | Indent_cocci2::xs -> | |
853 | (match !tabbing_unit with | |
854 | None -> aux started xs | |
855 | | Some (tu,_) -> | |
856 | _current_tabbing := (!_current_tabbing)^tu; | |
708f4980 | 857 | Cocci2 (tu,-1,-1,-1)::aux started xs) |
34e49164 C |
858 | | Unindent_cocci2::xs -> |
859 | (match !tabbing_unit with | |
860 | None -> aux started xs | |
861 | | Some (_,tu) -> | |
862 | _current_tabbing := remtab tu (!_current_tabbing); | |
863 | aux started xs) | |
864 | (* border between existing code and cocci code *) | |
708f4980 | 865 | | ((T2 (tok,_,_)) as x)::((Cocci2("\n",_,_,_)) as y)::xs |
b1b2de81 | 866 | when str_of_token2 x =$= "{" -> |
34e49164 C |
867 | x::aux true (y::Indent_cocci2::xs) |
868 | | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs | |
b1b2de81 | 869 | when str_of_token2 y =$= "}" -> |
34e49164 C |
870 | x::aux started (y::Unindent_cocci2::xs) |
871 | (* starting the body of the function *) | |
b1b2de81 | 872 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x =$= "{" -> x::aux true xs |
708f4980 C |
873 | | ((Cocci2("{",_,_,_)) as a)::xs -> a::aux true xs |
874 | | ((Cocci2("\n",_,_,_)) as x)::xs -> | |
0708f913 | 875 | (* dont inline in expr because of weird eval order of ocaml *) |
34e49164 | 876 | let s = !_current_tabbing in |
708f4980 | 877 | x::Cocci2 (s,-1,-1,-1)::aux started xs |
34e49164 C |
878 | | x::xs -> x::aux started xs in |
879 | aux false xs | |
880 | ||
881 | ||
882 | let rec find_paren_comma = function | |
883 | | [] -> () | |
884 | ||
885 | (* do nothing if was like this in original file *) | |
886 | | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
b1b2de81 | 887 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
888 | find_paren_comma (x2::xs) |
889 | ||
890 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
b1b2de81 | 891 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
892 | find_paren_comma (x2::xs) |
893 | ||
894 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) | |
b1b2de81 | 895 | ::xs when p2 =|= p1 + 1 -> |
34e49164 C |
896 | find_paren_comma (x2::xs) |
897 | ||
898 | (* otherwise yes can adjust *) | |
899 | | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> | |
900 | x2.remove <- true; | |
901 | find_paren_comma (x2::xs) | |
902 | | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> | |
903 | x1.remove <- true; | |
904 | find_paren_comma (x2::xs) | |
905 | ||
906 | | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> | |
907 | x1.remove <- true; | |
908 | find_paren_comma (x2::xs) | |
909 | ||
910 | | x::xs -> | |
911 | find_paren_comma xs | |
912 | ||
913 | ||
914 | let fix_tokens toks = | |
915 | let toks = toks +> List.map mk_token_extended in | |
916 | ||
917 | let cleaner = toks +> Common.exclude (function | |
918 | | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
919 | | _ -> false | |
920 | ) in | |
921 | find_paren_comma cleaner; | |
922 | ||
923 | let toks = rebuild_tokens_extented toks in | |
924 | toks +> List.map (fun x -> x.tok2) | |
925 | ||
926 | ||
927 | ||
928 | (*****************************************************************************) | |
929 | (* Final unparsing (and debugging support) *) | |
930 | (*****************************************************************************) | |
931 | ||
932 | (* for debugging *) | |
933 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
934 | ||
935 | let kind_of_token2 = function | |
936 | | Fake2 -> KFake | |
937 | | Cocci2 _ -> KCocci | |
938 | | C2 _ -> KC | |
939 | | T2 (t,_,_) -> | |
940 | (match TH.pinfo_of_tok t with | |
941 | | ExpandedTok _ -> KExpanded | |
942 | | OriginTok _ -> KOrigin | |
943 | | FakeTok _ -> raise Impossible (* now a Fake2 *) | |
944 | | AbstractLineTok _ -> raise Impossible (* now a KC *) | |
945 | ) | |
946 | | Unindent_cocci2 | Indent_cocci2 -> raise Impossible | |
947 | ||
948 | let end_mark = "!" | |
949 | ||
950 | let start_mark = function | |
951 | | KFake -> "!F!" | |
952 | | KCocci -> "!S!" | |
953 | | KC -> "!A!" | |
954 | | KExpanded -> "!E!" | |
955 | | KOrigin -> "" | |
956 | ||
957 | let print_all_tokens2 pr xs = | |
958 | if !Flag_parsing_c.debug_unparsing | |
959 | then | |
960 | let current_kind = ref KOrigin in | |
961 | xs +> List.iter (fun t -> | |
962 | let newkind = kind_of_token2 t in | |
b1b2de81 | 963 | if newkind =*= !current_kind |
34e49164 C |
964 | then pr (str_of_token2 t) |
965 | else begin | |
966 | pr (end_mark); | |
967 | pr (start_mark newkind); | |
968 | pr (str_of_token2 t); | |
969 | current_kind := newkind | |
970 | end | |
971 | ); | |
972 | else | |
973 | xs +> List.iter (fun x -> pr (str_of_token2 x)) | |
974 | ||
975 | ||
976 | ||
977 | ||
978 | (*****************************************************************************) | |
979 | (* Entry points *) | |
980 | (*****************************************************************************) | |
981 | ||
982 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
983 | * complete C file, including a modified C file by transformation.ml, | |
984 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
985 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
986 | * fancy stuff when a function was not modified at all. Just need to | |
987 | * print the list of token as-is. But now pretty_print_c.ml handles | |
988 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
989 | * optimize a little the pretty printing. | |
990 | * | |
991 | * update: now have PPviastr which goes even faster than PPviatok, so | |
992 | * PPviatok has disappeared. | |
993 | *) | |
994 | ||
995 | type ppmethod = PPnormal | PPviastr | |
996 | ||
997 | ||
998 | ||
999 | ||
1000 | (* The pp_program function will call pretty_print_c.ml with a special | |
1001 | * function to print the leaf components, the tokens. When we want to | |
1002 | * print a token, we need to print also maybe the space and comments that | |
1003 | * were close to it in the original file (and that was omitted during the | |
1004 | * parsing phase), and honor what the cocci-info attached to the token says. | |
1005 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
1006 | * print it and also print some cocci-code attached in a PLUS to it. | |
1007 | * So we will also maybe call unparse_cocci. Because the cocci-code may | |
1008 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
1009 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
1010 | *) | |
1011 | ||
1012 | let pp_program2 xs outfile = | |
1013 | Common.with_open_outfile outfile (fun (pr,chan) -> | |
1014 | let pr s = | |
1015 | if !Flag_parsing_c.debug_unparsing | |
1016 | then begin pr2_no_nl s; flush stderr end | |
1017 | else pr s | |
1018 | (* flush chan; *) | |
1019 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
1020 | in | |
1021 | ||
1022 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
34e49164 C |
1023 | (* here can still work on ast *) |
1024 | let e = remove_useless_fakeInfo_struct e in | |
1025 | ||
1026 | match ppmethod with | |
002099fc | 1027 | | PPnormal -> |
34e49164 C |
1028 | (* now work on tokens *) |
1029 | ||
1030 | (* phase1: just get all the tokens, all the information *) | |
1031 | assert(toks_e +> List.for_all (fun t -> | |
1032 | TH.is_origin t or TH.is_expanded t | |
1033 | )); | |
1034 | let toks = get_fakeInfo_and_tokens e toks_e in | |
1035 | let toks = displace_fake_nodes toks in | |
1036 | (* assert Origin;ExpandedTok;Faketok *) | |
1037 | let toks = expand_mcode toks in | |
1038 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) | |
1039 | * and no tag information, just NOTHING. *) | |
fc1ad971 C |
1040 | |
1041 | let toks = | |
1042 | if !Flag.sgrep_mode2 | |
1043 | then drop_minus toks (* nothing to do for sgrep *) | |
1044 | else | |
1045 | (* phase2: can now start to filter and adjust *) | |
1046 | let toks = adjust_indentation toks in | |
1047 | let toks = adjust_before_semicolon toks in(*before remove minus*) | |
1048 | let toks = remove_minus_and_between_and_expanded_and_fake toks in | |
1049 | (* assert Origin + Cocci + C and no minus *) | |
1050 | let toks = add_space toks in | |
1051 | let toks = fix_tokens toks in | |
1052 | toks in | |
34e49164 C |
1053 | |
1054 | (* in theory here could reparse and rework the ast! or | |
1055 | * apply some SP. Not before cos julia may have generated | |
1056 | * not parsable file. Need do unparsing_tricks call before being | |
1057 | * ready to reparse. *) | |
1058 | print_all_tokens2 pr toks; | |
1059 | ||
1060 | | PPviastr -> pr str | |
1061 | ) | |
1062 | ) | |
1063 | ||
1064 | let pp_program a b = | |
1065 | Common.profile_code "C unparsing" (fun () -> pp_program2 a b) | |
1066 | ||
485bce71 C |
1067 | |
1068 | let pp_program_default xs outfile = | |
1069 | let xs' = xs +> List.map (fun x -> x, PPnormal) in | |
1070 | pp_program xs' outfile |