Commit | Line | Data |
---|---|---|
485bce71 | 1 | (* Copyright (C) 2006, 2007, 2008 Yoann Padioleau |
34e49164 C |
2 | * |
3 | * This program is free software; you can redistribute it and/or | |
4 | * modify it under the terms of the GNU General Public License (GPL) | |
5 | * version 2 as published by the Free Software Foundation. | |
6 | * | |
7 | * This program is distributed in the hope that it will be useful, | |
8 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
9 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
10 | * file license.txt for more details. | |
485bce71 C |
11 | * |
12 | * | |
13 | * Modifications by Julia Lawall for better newline handling. | |
34e49164 C |
14 | *) |
15 | open Common | |
16 | ||
17 | open Ast_c | |
18 | ||
19 | module TH = Token_helpers | |
20 | ||
21 | ||
22 | ||
23 | ||
24 | (*****************************************************************************) | |
25 | (* Types used during the intermediate phases of the unparsing *) | |
26 | (*****************************************************************************) | |
27 | ||
28 | type token1 = | |
29 | | Fake1 of info | |
30 | | T1 of Parser_c.token | |
31 | ||
32 | (* The cocci_tag of the token should always be a NOTHING. The mark of | |
33 | * the token can only be OriginTok or ExpandedTok. Why not get rid of | |
34 | * token and get something simpler ? because we need to know if the | |
35 | * info is a TCommentCpp or TCommentSpace, etc for some of the further | |
36 | * analysis so easier to keep with the token. | |
37 | * | |
38 | * This type contains the whole information. Have all the tokens with this | |
39 | * type. | |
40 | *) | |
41 | type token2 = | |
42 | | T2 of Parser_c.token * bool (* minus *) * | |
43 | int option (* orig index, abstracting away comments and space *) | |
44 | | Fake2 | |
45 | | Cocci2 of string | |
46 | | C2 of string | |
47 | | Indent_cocci2 | |
48 | | Unindent_cocci2 | |
49 | ||
50 | (* not used yet *) | |
51 | type token3 = | |
52 | | T3 of Parser_c.token | |
53 | | Cocci3 of string | |
54 | | C3 of string | |
55 | ||
56 | ||
57 | (* similar to the tech in parsing_hack *) | |
58 | type token_extended = { | |
59 | tok2 : token2; | |
60 | str : string; | |
61 | idx: int option; (* to know if 2 tokens were consecutive in orig file *) | |
62 | mutable new_tokens_before : token2 list; | |
63 | mutable remove : bool; | |
64 | } | |
65 | ||
66 | ||
67 | (*****************************************************************************) | |
68 | (* Helpers *) | |
69 | (*****************************************************************************) | |
70 | ||
71 | let info_of_token1 t = | |
72 | match t with | |
73 | | Fake1 info -> info | |
74 | | T1 tok -> TH.info_of_tok tok | |
75 | ||
76 | let str_of_token2 = function | |
77 | | T2 (t,_,_) -> TH.str_of_tok t | |
78 | | Fake2 -> "" | |
79 | | Cocci2 s -> s | |
80 | | C2 s -> s | |
81 | | Indent_cocci2 -> "" | |
82 | | Unindent_cocci2 -> "" | |
83 | ||
84 | let print_token2 = function | |
85 | | T2 (t,b,_) -> "T2:"^(if b then "-" else "")^TH.str_of_tok t | |
86 | | Fake2 -> "" | |
87 | | Cocci2 s -> "Cocci2:"^s | |
88 | | C2 s -> "C2:"^s | |
89 | | Indent_cocci2 -> "Indent" | |
90 | | Unindent_cocci2 -> "Unindent" | |
91 | ||
92 | let print_all_tokens2 l = | |
93 | List.iter (function x -> Printf.printf "%s " (print_token2 x)) l; | |
94 | Printf.printf "\n" | |
95 | ||
96 | let str_of_token3 = function | |
97 | | T3 t -> TH.str_of_tok t | |
98 | | Cocci3 s | C3 s -> s | |
99 | ||
100 | ||
101 | ||
102 | let mk_token_extended x = | |
103 | let origidx = | |
104 | match x with | |
105 | | T2 (_,_, idx) -> idx | |
106 | | _ -> None | |
107 | in | |
108 | { tok2 = x; | |
109 | str = str_of_token2 x; | |
110 | idx = origidx; | |
111 | new_tokens_before = []; | |
112 | remove = false; | |
113 | } | |
114 | ||
115 | let rebuild_tokens_extented toks_ext = | |
116 | let _tokens = ref [] in | |
117 | toks_ext +> List.iter (fun tok -> | |
118 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); | |
119 | if not tok.remove then push2 tok.tok2 _tokens; | |
120 | ); | |
121 | let tokens = List.rev !_tokens in | |
122 | (tokens +> List.map mk_token_extended) | |
123 | ||
124 | ||
125 | let mcode_contain_plus = function | |
126 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false | |
127 | | Ast_cocci.CONTEXT _ -> true | |
485bce71 | 128 | (* patch: when need full coccinelle transformation *) |
34e49164 C |
129 | | Ast_cocci.MINUS (_,[]) -> false |
130 | | Ast_cocci.MINUS (_,x::xs) -> true | |
131 | | Ast_cocci.PLUS -> raise Impossible | |
132 | ||
133 | let contain_plus info = | |
134 | let mck = Ast_c.mcode_of_info info in | |
135 | mcode_contain_plus mck | |
136 | ||
137 | (*****************************************************************************) | |
138 | (* Last fix on the ast *) | |
139 | (*****************************************************************************) | |
140 | ||
141 | (* Because of the ugly trick to handle initialiser, I generate fake ',' | |
142 | * for the last initializer element, but if there is nothing around it, | |
143 | * I don't want in the end to print it. | |
144 | *) | |
145 | ||
146 | let remove_useless_fakeInfo_struct program = | |
147 | let bigf = { Visitor_c.default_visitor_c_s with | |
148 | Visitor_c.kini_s = (fun (k,bigf) ini -> | |
149 | match k ini with | |
150 | | InitList args, ii -> | |
151 | (match ii with | |
152 | | [_i1;_i2] -> ini | |
153 | | [i1;i2;iicommaopt] -> | |
154 | if (not (contain_plus iicommaopt)) && (not (contain_plus i2)) | |
155 | && (Ast_c.is_fake iicommaopt) | |
156 | (* sometimes the guy put a normal iicommaopt *) | |
157 | ||
158 | then InitList args, [i1;i2] | |
159 | else InitList args, [i1;i2;iicommaopt] | |
160 | | _ -> raise Impossible | |
161 | ) | |
162 | | x -> x | |
163 | ) | |
164 | } in | |
165 | Visitor_c.vk_toplevel_s bigf program | |
166 | ||
167 | ||
168 | (*****************************************************************************) | |
169 | (* Tokens1 generation *) | |
170 | (*****************************************************************************) | |
171 | ||
172 | let get_fakeInfo_and_tokens celem toks = | |
173 | let toks_in = ref toks in | |
174 | let toks_out = ref [] in | |
175 | ||
176 | (* todo? verify good order of position ? *) | |
177 | let pr_elem info = | |
178 | match Ast_c.pinfo_of_info info with | |
179 | | FakeTok _ -> | |
180 | Common.push2 (Fake1 info) toks_out | |
181 | | OriginTok _ | ExpandedTok _ -> | |
182 | (* get the associated comments/space/cppcomment tokens *) | |
183 | let (before, x, after) = !toks_in +> Common.split_when (fun tok -> | |
184 | info =*= TH.info_of_tok tok) | |
185 | in | |
186 | assert(info = TH.info_of_tok x); | |
187 | (*old: assert(before +> List.for_all (TH.is_comment)); *) | |
188 | before +> List.iter (fun x -> | |
189 | if not (TH.is_comment x) | |
190 | then pr2 ("WIERD: not a comment:" ^ TH.str_of_tok x) | |
191 | (* case such as int asm d3("x"); not yet in ast *) | |
192 | ); | |
193 | before +> List.iter (fun x -> Common.push2 (T1 x) toks_out); | |
194 | push2 (T1 x) toks_out; | |
195 | toks_in := after; | |
196 | | AbstractLineTok _ -> | |
197 | (* can be called on type info when for instance use -type_c *) | |
198 | if !Flag_parsing_c.pretty_print_type_info | |
199 | then Common.push2 (Fake1 info) toks_out | |
200 | else raise Impossible (* at this stage *) | |
201 | in | |
202 | ||
203 | let pr_space _ = () in (* use the spacing that is there already *) | |
204 | ||
205 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; | |
206 | ||
207 | if not (null !toks_in) | |
208 | then failwith "WIERD: unparsing not finished"; | |
209 | ||
210 | List.rev !toks_out | |
211 | ||
212 | (* Fake nodes that have BEFORE code should be moved over any subsequent | |
213 | whitespace and newlines, but not any comments, to get as close to the affected | |
214 | code as possible. Similarly, face nodes that have AFTER code should be moved | |
215 | backwards. No fake nodes should have both before and after code. *) | |
216 | ||
217 | let displace_fake_nodes toks = | |
218 | let is_fake = function Fake1 _ -> true | _ -> false in | |
219 | let is_whitespace = function | |
220 | T1(Parser_c.TCommentSpace _) | |
485bce71 | 221 | (* patch: cocci *) |
34e49164 C |
222 | | T1(Parser_c.TCommentNewline _) -> true |
223 | | _ -> false in | |
224 | let rec loop toks = | |
225 | let fake_info = | |
226 | try Some (Common.split_when is_fake toks) | |
227 | with Not_found -> None in | |
228 | match fake_info with | |
229 | Some(bef,((Fake1 info) as fake),aft) -> | |
230 | (match !(info.cocci_tag) with | |
231 | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> | |
232 | (* move the fake node forwards *) | |
233 | let (whitespace,rest) = Common.span is_whitespace aft in | |
234 | bef @ whitespace @ fake :: (loop rest) | |
235 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> | |
236 | (* move the fake node backwards *) | |
237 | let revbef = List.rev bef in | |
238 | let (revwhitespace,revprev) = Common.span is_whitespace revbef in | |
239 | let whitespace = List.rev revwhitespace in | |
240 | let prev = List.rev revprev in | |
241 | prev @ fake :: (loop (whitespace @ aft)) | |
242 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) -> | |
243 | bef @ fake :: (loop aft) | |
244 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> | |
245 | failwith "fake node should not be before-after" | |
485bce71 C |
246 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) |
247 | ) | |
34e49164 C |
248 | | None -> toks |
249 | | _ -> raise Impossible in | |
250 | loop toks | |
251 | ||
252 | (*****************************************************************************) | |
253 | (* Tokens2 generation *) | |
254 | (*****************************************************************************) | |
255 | ||
256 | let expand_mcode toks = | |
257 | let toks_out = ref [] in | |
258 | ||
259 | let index = ref 0 in | |
260 | ||
261 | let add_elem t minus = | |
262 | match t with | |
263 | | Fake1 info -> | |
264 | let str = Ast_c.str_of_info info in | |
265 | if str = "" | |
266 | then push2 (Fake2) toks_out | |
267 | (* perhaps the fake ',' *) | |
268 | else push2 (C2 str) toks_out | |
269 | ||
270 | ||
271 | | T1 tok -> | |
272 | (* no tag on expandedTok ! *) | |
273 | assert(not (TH.is_expanded tok && | |
274 | !((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot)); | |
275 | ||
276 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> | |
277 | { i with cocci_tag = ref Ast_c.emptyAnnot; } | |
278 | ) in | |
279 | ||
280 | let optindex = | |
281 | if TH.is_origin tok && not (TH.is_real_comment tok) | |
282 | then begin | |
283 | incr index; | |
284 | Some !index | |
285 | end | |
286 | else None | |
287 | in | |
288 | ||
289 | push2 (T2 (tok', minus, optindex)) toks_out | |
290 | in | |
291 | ||
292 | let expand_info t = | |
293 | let (mcode,env) = !((info_of_token1 t).cocci_tag) in | |
294 | ||
295 | let pr_cocci s = | |
296 | push2 (Cocci2 s) toks_out | |
297 | in | |
298 | let pr_c info = | |
299 | match Ast_c.pinfo_of_info info with | |
300 | Ast_c.AbstractLineTok _ -> push2 (C2 (Ast_c.str_of_info info)) toks_out | |
301 | | Ast_c.FakeTok (s,_) -> push2 (C2 s) toks_out | |
302 | | _ -> | |
303 | Printf.printf "line: %s\n" (Common.dump info); | |
304 | failwith "not an abstract line" in | |
305 | ||
306 | let pr_space _ = push2 (C2 " ") toks_out in | |
307 | ||
308 | let indent _ = push2 Indent_cocci2 toks_out in | |
309 | let unindent _ = push2 Unindent_cocci2 toks_out in | |
310 | ||
311 | let args_pp = (env, pr_cocci, pr_c, pr_space, indent, unindent) in | |
312 | ||
485bce71 C |
313 | (* old: when for yacfe with partial cocci: |
314 | * add_elem t false; | |
315 | *) | |
34e49164 | 316 | |
485bce71 | 317 | (* patch: when need full coccinelle transformation *) |
34e49164 C |
318 | match mcode with |
319 | | Ast_cocci.MINUS (_,any_xxs) -> | |
320 | (* Why adding ? because I want to have all the information, the whole | |
321 | * set of tokens, so I can then process and remove the | |
322 | * is_between_two_minus for instance *) | |
323 | add_elem t true; | |
485bce71 | 324 | Unparse_cocci.pp_list_list_any args_pp any_xxs Unparse_cocci.InPlace |
34e49164 C |
325 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
326 | (match any_befaft with | |
327 | | Ast_cocci.NOTHING -> | |
328 | add_elem t false | |
329 | | Ast_cocci.BEFORE xxs -> | |
485bce71 | 330 | Unparse_cocci.pp_list_list_any args_pp xxs Unparse_cocci.Before; |
34e49164 C |
331 | add_elem t false |
332 | | Ast_cocci.AFTER xxs -> | |
333 | add_elem t false; | |
485bce71 | 334 | Unparse_cocci.pp_list_list_any args_pp xxs Unparse_cocci.After; |
34e49164 | 335 | | Ast_cocci.BEFOREAFTER (xxs, yys) -> |
485bce71 | 336 | Unparse_cocci.pp_list_list_any args_pp xxs Unparse_cocci.Before; |
34e49164 | 337 | add_elem t false; |
485bce71 | 338 | Unparse_cocci.pp_list_list_any args_pp yys Unparse_cocci.After; |
34e49164 C |
339 | ) |
340 | | Ast_cocci.PLUS -> raise Impossible | |
341 | ||
342 | in | |
343 | ||
344 | toks +> List.iter expand_info; | |
345 | List.rev !toks_out | |
346 | ||
347 | ||
348 | (*****************************************************************************) | |
349 | (* Tokens2 processing, filtering, adjusting *) | |
350 | (*****************************************************************************) | |
351 | ||
352 | let is_minusable_comment = function | |
353 | | T2 (t,_b,_i) -> | |
354 | (match t with | |
355 | | Parser_c.TCommentSpace _ (* only whitespace *) | |
485bce71 | 356 | (* patch: coccinelle *) |
34e49164 C |
357 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) |
358 | | Parser_c.TComment _ | |
359 | | Parser_c.TCommentCpp (Ast_c.CppAttr, _) | |
360 | | Parser_c.TCommentCpp (Ast_c.CppMacro, _) | |
361 | -> true | |
362 | ||
363 | | Parser_c.TCommentMisc _ | |
364 | | Parser_c.TCommentCpp (Ast_c.CppDirective, _) | |
485bce71 | 365 | | Parser_c.TCommentCpp (Ast_c.CppPassingCosWouldGetError, _) |
34e49164 C |
366 | -> false |
367 | ||
368 | | _ -> false | |
369 | ) | |
370 | | _ -> false | |
371 | ||
372 | let all_coccis = function | |
373 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 -> true | |
374 | | _ -> false | |
375 | ||
376 | let is_minusable_comment_or_plus = function | |
485bce71 | 377 | (* patch: coccinelle *) |
34e49164 C |
378 | T2(Parser_c.TCommentNewline _,_b,_i) -> false |
379 | | x -> is_minusable_comment x or all_coccis x | |
380 | ||
381 | let set_minus_comment = function | |
382 | | T2 (t,false,idx) -> | |
383 | let str = TH.str_of_tok t in | |
384 | (match t with | |
385 | | Parser_c.TCommentSpace _ | |
485bce71 | 386 | (* patch: coccinelle *) |
34e49164 C |
387 | | Parser_c.TCommentNewline _ -> () |
388 | ||
389 | | Parser_c.TComment _ | |
390 | | Parser_c.TCommentCpp (Ast_c.CppAttr, _) | |
391 | | Parser_c.TCommentCpp (Ast_c.CppMacro, _) | |
392 | -> | |
393 | pr2 ("ERASING_COMMENTS: " ^ str) | |
394 | | _ -> raise Impossible | |
395 | ); | |
396 | T2 (t, true, idx) | |
485bce71 | 397 | (* patch: coccinelle *) |
34e49164 C |
398 | | T2 (Parser_c.TCommentNewline _,true,idx) as x -> x |
399 | | _ -> raise Impossible | |
400 | ||
401 | let set_minus_comment_or_plus = function | |
402 | Cocci2 _ | C2 _ | Indent_cocci2 | Unindent_cocci2 as x -> x | |
403 | | x -> set_minus_comment x | |
404 | ||
405 | ||
406 | let remove_minus_and_between_and_expanded_and_fake xs = | |
407 | ||
408 | (* get rid of exampled and fake tok *) | |
409 | let xs = xs +> Common.exclude (function | |
410 | | T2 (t,_,_) when TH.is_expanded t -> true | |
411 | | Fake2 -> true | |
412 | ||
413 | | _ -> false | |
414 | ) | |
415 | in | |
416 | ||
417 | (*This drops the space before each completely minused block (no plus code).*) | |
418 | let rec adjust_before_minus = function | |
419 | [] -> [] | |
485bce71 | 420 | (* patch: coccinelle *) |
34e49164 C |
421 | | (T2(Parser_c.TCommentNewline c,_b,_i) as x)::((T2(_,true,_)::_) as xs) -> |
422 | let minus_or_comment = function | |
423 | T2(_,true,_) -> true | |
424 | | T2(Parser_c.TCommentNewline _,_b,_i) -> false | |
425 | | x -> is_minusable_comment x in | |
426 | let (between_minus,rest) = Common.span minus_or_comment xs in | |
427 | (match rest with | |
428 | [] -> (set_minus_comment x) :: between_minus | |
429 | | T2(Parser_c.TCommentNewline _,_b,_i)::_ -> | |
430 | (set_minus_comment x) :: between_minus @ | |
431 | (adjust_before_minus rest) | |
432 | | _ -> x :: between_minus @ (adjust_before_minus rest)) | |
433 | | x::xs -> x::adjust_before_minus xs in | |
434 | ||
435 | let xs = adjust_before_minus xs in | |
436 | ||
437 | (* this deals with any stuff that is between the minused code, eg | |
438 | spaces, comments, attributes, etc. *) | |
439 | (* The use of is_minusable_comment_or_plus and set_minus_comment_or_plus | |
440 | is because the + code can end up anywhere in the middle of the - code; | |
441 | it is not necessarily to the far left *) | |
442 | let rec adjust_between_minus xs = | |
443 | match xs with | |
444 | | [] -> [] | |
445 | | (T2 (t1,true,idx1))::xs -> | |
446 | ||
447 | let (between_comments, rest) = | |
448 | Common.span is_minusable_comment_or_plus xs in | |
449 | (match rest with | |
450 | | [] -> [(T2 (t1, true,idx1))] | |
451 | ||
452 | | (T2 (t2, true,idx2))::rest -> | |
453 | (T2 (t1, true,idx1)):: | |
454 | (List.map set_minus_comment_or_plus between_comments @ | |
455 | adjust_between_minus ((T2 (t2, true, idx2))::rest)) | |
456 | | x::xs -> | |
457 | (T2 (t1, true, idx1)):: | |
458 | (between_comments @ adjust_between_minus (x::xs)) | |
459 | ) | |
460 | ||
461 | | x::xs -> x::adjust_between_minus xs in | |
462 | ||
463 | let xs = adjust_between_minus xs in | |
464 | ||
465 | let xs = xs +> Common.exclude (function | |
466 | | T2 (t,true,_) -> true | |
467 | | _ -> false | |
468 | ) in | |
469 | xs | |
470 | ||
471 | ||
472 | let is_ident_like s = s ==~ Common.regexp_alpha | |
473 | ||
474 | let rec add_space xs = | |
475 | match xs with | |
476 | | [] -> [] | |
477 | | [x] -> [x] | |
478 | | x::y::xs -> | |
479 | let sx = str_of_token2 x in | |
480 | let sy = str_of_token2 y in | |
481 | if is_ident_like sx && is_ident_like sy | |
482 | then x::C2 " "::(add_space (y::xs)) | |
483 | else x::(add_space (y::xs)) | |
484 | ||
485 | ||
486 | ||
487 | (* When insert some new code, because of a + in a SP, we must add this | |
488 | * code at the right place, with the good indentation. So each time we | |
489 | * encounter some spacing info, with some newline, we maintain the | |
490 | * current indentation level used. | |
491 | * | |
492 | * TODO problems: not accurate. ex: TODO | |
493 | * | |
494 | * TODO: if in #define region, should add a \ \n | |
495 | *) | |
496 | let new_tabbing2 space = | |
497 | (list_of_string space) | |
498 | +> List.rev | |
499 | +> Common.take_until (fun c -> c = '\n') | |
500 | +> List.rev | |
501 | +> List.map string_of_char | |
502 | +> String.concat "" | |
503 | ||
504 | let new_tabbing a = | |
505 | Common.profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) | |
506 | ||
507 | ||
508 | let rec adjust_indentation xs = | |
509 | let _current_tabbing = ref "" in | |
510 | let tabbing_unit = ref None in | |
511 | ||
512 | let string_of_list l = String.concat "" (List.map string_of_char l) in | |
513 | ||
514 | (* try to pick a tabbing unit for the plus code *) | |
515 | let adjust_tabbing_unit old_tab new_tab = | |
516 | if !tabbing_unit = None && String.length new_tab > String.length old_tab | |
517 | then | |
518 | let old_tab = list_of_string old_tab in | |
519 | let new_tab = list_of_string new_tab in | |
520 | let rec loop = function | |
521 | ([],new_tab) -> | |
522 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) | |
523 | | (_,[]) -> failwith "not possible" | |
524 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) | |
525 | loop (old_tab,new_tab) in | |
526 | ||
527 | let remtab tu current_tab = | |
528 | let current_tab = List.rev(list_of_string current_tab) in | |
529 | let rec loop = function | |
530 | ([],new_tab) -> string_of_list (List.rev new_tab) | |
531 | | (_,[]) -> "" (*wierd; tabbing unit used up more than the current tab*) | |
532 | | (t::ts,n::ns) when t = n -> loop (ts,ns) | |
533 | | (_,ns) -> (* mismatch; remove what we can *) | |
534 | string_of_list (List.rev ns) in | |
535 | loop (tu,current_tab) in | |
536 | ||
537 | let rec find_first_tab started = function | |
538 | [] -> () | |
539 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x = "{" -> | |
540 | find_first_tab true xs | |
485bce71 | 541 | (* patch: coccinelle *) |
34e49164 C |
542 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::_ |
543 | when started -> | |
544 | let s = str_of_token2 x +> new_tabbing in | |
545 | tabbing_unit := Some (s,List.rev (list_of_string s)) | |
546 | | x::xs -> find_first_tab started xs in | |
547 | find_first_tab false xs; | |
548 | ||
549 | let rec aux started xs = | |
550 | match xs with | |
551 | | [] -> [] | |
485bce71 | 552 | (* patch: coccinelle *) |
34e49164 C |
553 | | ((T2 (tok,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _)):: |
554 | (Cocci2 "{")::xs when started && str_of_token2 x = ")" -> | |
555 | (* to be done for if, etc, but not for a function header *) | |
556 | x::(Cocci2 " {")::(aux started xs) | |
557 | | ((T2 (Parser_c.TCommentNewline s, _, _)) as x)::xs -> | |
558 | let old_tabbing = !_current_tabbing in | |
559 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := s); | |
560 | (* only trust the indentation after the first { *) | |
561 | (if started then adjust_tabbing_unit old_tabbing !_current_tabbing); | |
562 | let coccis_rest = Common.span all_coccis xs in | |
563 | (match coccis_rest with | |
564 | (_::_,((T2 (tok,_,_)) as y)::_) when str_of_token2 y = "}" -> | |
565 | (* the case where cocci code has been added before a close } *) | |
566 | x::aux started (Indent_cocci2::xs) | |
567 | | _ -> x::aux started xs) | |
568 | | Indent_cocci2::xs -> | |
569 | (match !tabbing_unit with | |
570 | None -> aux started xs | |
571 | | Some (tu,_) -> | |
572 | _current_tabbing := (!_current_tabbing)^tu; | |
573 | Cocci2 (tu)::aux started xs) | |
574 | | Unindent_cocci2::xs -> | |
575 | (match !tabbing_unit with | |
576 | None -> aux started xs | |
577 | | Some (_,tu) -> | |
578 | _current_tabbing := remtab tu (!_current_tabbing); | |
579 | aux started xs) | |
580 | (* border between existing code and cocci code *) | |
581 | | ((T2 (tok,_,_)) as x)::((Cocci2 "\n") as y)::xs | |
582 | when str_of_token2 x = "{" -> | |
583 | x::aux true (y::Indent_cocci2::xs) | |
584 | | ((Cocci2 _) as x)::((T2 (tok,_,_)) as y)::xs | |
585 | when str_of_token2 y = "}" -> | |
586 | x::aux started (y::Unindent_cocci2::xs) | |
587 | (* starting the body of the function *) | |
588 | | ((T2 (tok,_,_)) as x)::xs when str_of_token2 x = "{" -> x::aux true xs | |
589 | | (Cocci2 "{")::xs -> (Cocci2 "{")::aux true xs | |
590 | | ((Cocci2 "\n") as x)::xs -> | |
591 | (* dont inline in expr because of wierd eval order of ocaml *) | |
592 | let s = !_current_tabbing in | |
593 | x::Cocci2 (s)::aux started xs | |
594 | | x::xs -> x::aux started xs in | |
595 | aux false xs | |
596 | ||
597 | ||
598 | let rec find_paren_comma = function | |
599 | | [] -> () | |
600 | ||
601 | (* do nothing if was like this in original file *) | |
602 | | ({ str = "("; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
603 | ::xs when p2 = p1 + 1 -> | |
604 | find_paren_comma (x2::xs) | |
605 | ||
606 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ","; idx = Some p2} as x2) | |
607 | ::xs when p2 = p1 + 1 -> | |
608 | find_paren_comma (x2::xs) | |
609 | ||
610 | | ({ str = ","; idx = Some p1 } as _x1)::({ str = ")"; idx = Some p2} as x2) | |
611 | ::xs when p2 = p1 + 1 -> | |
612 | find_paren_comma (x2::xs) | |
613 | ||
614 | (* otherwise yes can adjust *) | |
615 | | ({ str = "(" } as _x1)::({ str = ","} as x2)::xs -> | |
616 | x2.remove <- true; | |
617 | find_paren_comma (x2::xs) | |
618 | | ({ str = "," } as x1)::({ str = ","} as x2)::xs -> | |
619 | x1.remove <- true; | |
620 | find_paren_comma (x2::xs) | |
621 | ||
622 | | ({ str = "," } as x1)::({ str = ")"} as x2)::xs -> | |
623 | x1.remove <- true; | |
624 | find_paren_comma (x2::xs) | |
625 | ||
626 | | x::xs -> | |
627 | find_paren_comma xs | |
628 | ||
629 | ||
630 | let fix_tokens toks = | |
631 | let toks = toks +> List.map mk_token_extended in | |
632 | ||
633 | let cleaner = toks +> Common.exclude (function | |
634 | | {tok2 = T2 (t,_,_)} -> TH.is_real_comment t (* I want the ifdef *) | |
635 | | _ -> false | |
636 | ) in | |
637 | find_paren_comma cleaner; | |
638 | ||
639 | let toks = rebuild_tokens_extented toks in | |
640 | toks +> List.map (fun x -> x.tok2) | |
641 | ||
642 | ||
643 | ||
644 | (*****************************************************************************) | |
645 | (* Final unparsing (and debugging support) *) | |
646 | (*****************************************************************************) | |
647 | ||
648 | (* for debugging *) | |
649 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin | |
650 | ||
651 | let kind_of_token2 = function | |
652 | | Fake2 -> KFake | |
653 | | Cocci2 _ -> KCocci | |
654 | | C2 _ -> KC | |
655 | | T2 (t,_,_) -> | |
656 | (match TH.pinfo_of_tok t with | |
657 | | ExpandedTok _ -> KExpanded | |
658 | | OriginTok _ -> KOrigin | |
659 | | FakeTok _ -> raise Impossible (* now a Fake2 *) | |
660 | | AbstractLineTok _ -> raise Impossible (* now a KC *) | |
661 | ) | |
662 | | Unindent_cocci2 | Indent_cocci2 -> raise Impossible | |
663 | ||
664 | let end_mark = "!" | |
665 | ||
666 | let start_mark = function | |
667 | | KFake -> "!F!" | |
668 | | KCocci -> "!S!" | |
669 | | KC -> "!A!" | |
670 | | KExpanded -> "!E!" | |
671 | | KOrigin -> "" | |
672 | ||
673 | let print_all_tokens2 pr xs = | |
674 | if !Flag_parsing_c.debug_unparsing | |
675 | then | |
676 | let current_kind = ref KOrigin in | |
677 | xs +> List.iter (fun t -> | |
678 | let newkind = kind_of_token2 t in | |
679 | if newkind = !current_kind | |
680 | then pr (str_of_token2 t) | |
681 | else begin | |
682 | pr (end_mark); | |
683 | pr (start_mark newkind); | |
684 | pr (str_of_token2 t); | |
685 | current_kind := newkind | |
686 | end | |
687 | ); | |
688 | else | |
689 | xs +> List.iter (fun x -> pr (str_of_token2 x)) | |
690 | ||
691 | ||
692 | ||
693 | ||
694 | (*****************************************************************************) | |
695 | (* Entry points *) | |
696 | (*****************************************************************************) | |
697 | ||
698 | (* old: PPviatok was made in the beginning to allow to pretty print a | |
699 | * complete C file, including a modified C file by transformation.ml, | |
700 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and | |
701 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some | |
702 | * fancy stuff when a function was not modified at all. Just need to | |
703 | * print the list of token as-is. But now pretty_print_c.ml handles | |
704 | * almost everything so maybe less useful. Maybe PPviatok allows to | |
705 | * optimize a little the pretty printing. | |
706 | * | |
707 | * update: now have PPviastr which goes even faster than PPviatok, so | |
708 | * PPviatok has disappeared. | |
709 | *) | |
710 | ||
711 | type ppmethod = PPnormal | PPviastr | |
712 | ||
713 | ||
714 | ||
715 | ||
716 | (* The pp_program function will call pretty_print_c.ml with a special | |
717 | * function to print the leaf components, the tokens. When we want to | |
718 | * print a token, we need to print also maybe the space and comments that | |
719 | * were close to it in the original file (and that was omitted during the | |
720 | * parsing phase), and honor what the cocci-info attached to the token says. | |
721 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will | |
722 | * print it and also print some cocci-code attached in a PLUS to it. | |
723 | * So we will also maybe call unparse_cocci. Because the cocci-code may | |
724 | * contain metavariables, unparse_cocci will in fact sometimes call back | |
725 | * pretty_print_c (which will this time don't call back again unparse_cocci) | |
726 | *) | |
727 | ||
728 | let pp_program2 xs outfile = | |
729 | Common.with_open_outfile outfile (fun (pr,chan) -> | |
730 | let pr s = | |
731 | if !Flag_parsing_c.debug_unparsing | |
732 | then begin pr2_no_nl s; flush stderr end | |
733 | else pr s | |
734 | (* flush chan; *) | |
735 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) | |
736 | in | |
737 | ||
738 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> | |
739 | ||
740 | (* here can still work on ast *) | |
741 | let e = remove_useless_fakeInfo_struct e in | |
742 | ||
743 | match ppmethod with | |
744 | | PPnormal -> | |
745 | (* now work on tokens *) | |
746 | ||
747 | (* phase1: just get all the tokens, all the information *) | |
748 | assert(toks_e +> List.for_all (fun t -> | |
749 | TH.is_origin t or TH.is_expanded t | |
750 | )); | |
751 | let toks = get_fakeInfo_and_tokens e toks_e in | |
752 | let toks = displace_fake_nodes toks in | |
753 | (* assert Origin;ExpandedTok;Faketok *) | |
754 | let toks = expand_mcode toks in | |
755 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) | |
756 | * and no tag information, just NOTHING. *) | |
757 | ||
758 | (* phase2: can now start to filter and adjust *) | |
759 | let toks = adjust_indentation toks in | |
760 | let toks = remove_minus_and_between_and_expanded_and_fake toks in | |
761 | (* assert Origin + Cocci + C and no minus *) | |
762 | let toks = add_space toks in | |
763 | let toks = fix_tokens toks in | |
764 | ||
765 | (* in theory here could reparse and rework the ast! or | |
766 | * apply some SP. Not before cos julia may have generated | |
767 | * not parsable file. Need do unparsing_tricks call before being | |
768 | * ready to reparse. *) | |
769 | print_all_tokens2 pr toks; | |
770 | ||
771 | | PPviastr -> pr str | |
772 | ) | |
773 | ) | |
774 | ||
775 | let pp_program a b = | |
776 | Common.profile_code "C unparsing" (fun () -> pp_program2 a b) | |
777 | ||
485bce71 C |
778 | |
779 | let pp_program_default xs outfile = | |
780 | let xs' = xs +> List.map (fun x -> x, PPnormal) in | |
781 | pp_program xs' outfile |