| 1 | (* Yoann Padioleau, Julia Lawall |
| 2 | * |
| 3 | * Copyright (C) 2012, INRIA. |
| 4 | * Copyright (C) 2010, 2011, University of Copenhagen DIKU and INRIA. |
| 5 | * Copyright (C) 2006, 2007, 2008, 2009 Ecole des Mines de Nantes and DIKU |
| 6 | * |
| 7 | * This program is free software; you can redistribute it and/or |
| 8 | * modify it under the terms of the GNU General Public License (GPL) |
| 9 | * version 2 as published by the Free Software Foundation. |
| 10 | * |
| 11 | * This program is distributed in the hope that it will be useful, |
| 12 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 13 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 14 | * file license.txt for more details. |
| 15 | * |
| 16 | * |
| 17 | * Modifications by Julia Lawall for better newline handling. |
| 18 | *) |
| 19 | open Common |
| 20 | |
| 21 | module TH = Token_helpers |
| 22 | |
| 23 | (* should keep comments and directives in between adjacent deleted terms, |
| 24 | but not comments and directives within deleted terms. should use the |
| 25 | labels found in the control-flow graph *) |
| 26 | |
| 27 | |
| 28 | |
| 29 | (*****************************************************************************) |
| 30 | (* Wrappers *) |
| 31 | (*****************************************************************************) |
| 32 | let pr2, pr2_once = mk_pr2_wrappers Flag_parsing_c.verbose_unparsing |
| 33 | |
| 34 | (*****************************************************************************) |
| 35 | (* Types used during the intermediate phases of the unparsing *) |
| 36 | (*****************************************************************************) |
| 37 | |
| 38 | type token1 = |
| 39 | | Fake1 of Ast_c.info |
| 40 | | T1 of Parser_c.token |
| 41 | |
| 42 | (* The cocci_tag of the token should always be a NOTHING. The mark of |
| 43 | * the token can only be OriginTok or ExpandedTok. Why not get rid of |
| 44 | * token and get something simpler ? because we need to know if the |
| 45 | * info is a TCommentCpp or TCommentSpace, etc for some of the further |
| 46 | * analysis so easier to keep with the token. |
| 47 | * |
| 48 | * This type contains the whole information. Have all the tokens with this |
| 49 | * type. |
| 50 | *) |
| 51 | type min = |
| 52 | | Min of (int list (* match numbers from witness trees *) * |
| 53 | Ast_cocci.adjacency (* adjacency information *)) |
| 54 | | Ctx |
| 55 | |
| 56 | type token2 = |
| 57 | | T2 of Parser_c.token * min |
| 58 | * int option (* orig index, abstracting away comments and space *) |
| 59 | * Unparse_cocci.nlhint option |
| 60 | | Fake2 of min |
| 61 | | Cocci2 of string * int (* line *) * int (* lcol *) * int (* rcol *) |
| 62 | * Unparse_cocci.nlhint option |
| 63 | | C2 of string |
| 64 | | Comma of string |
| 65 | | Indent_cocci2 |
| 66 | | Unindent_cocci2 of bool (* true for permanent, false for temporary *) |
| 67 | | EatSpace2 |
| 68 | |
| 69 | (* not used yet *) |
| 70 | type token3 = |
| 71 | | T3 of Parser_c.token |
| 72 | | Cocci3 of string |
| 73 | | C3 of string |
| 74 | |
| 75 | |
| 76 | (* similar to the tech in parsing_hack *) |
| 77 | type token_extended = |
| 78 | { tok2 : token2; |
| 79 | str : string; |
| 80 | idx : int option; (* to know if 2 tokens were consecutive in orig file *) |
| 81 | mutable new_tokens_before : token2 list; |
| 82 | mutable remove : bool; |
| 83 | } |
| 84 | |
| 85 | |
| 86 | (*****************************************************************************) |
| 87 | (* Helpers *) |
| 88 | (*****************************************************************************) |
| 89 | |
| 90 | let info_of_token1 t = |
| 91 | match t with |
| 92 | | Fake1 info -> info |
| 93 | | T1 tok -> TH.info_of_tok tok |
| 94 | |
| 95 | let print_token1 = function |
| 96 | | T1 tok -> TH.str_of_tok tok |
| 97 | | Fake1 info -> "fake" |
| 98 | |
| 99 | let str_of_token2 = function |
| 100 | | T2 (t,_,_,_) -> TH.str_of_tok t |
| 101 | | Cocci2 (s,_,_,_,_) |
| 102 | | C2 s |
| 103 | | Comma s -> s |
| 104 | | Fake2 _ |
| 105 | | Indent_cocci2 |
| 106 | | Unindent_cocci2 _ |
| 107 | | EatSpace2 -> "" |
| 108 | |
| 109 | let print_token2 = function |
| 110 | | T2 (t,b,_,_) -> |
| 111 | let t_str = |
| 112 | match t with |
| 113 | | Parser_c.TCommentSpace _ -> " sp " |
| 114 | | Parser_c.TCommentNewline _ -> " nl " |
| 115 | | Parser_c.TCommentCpp _ -> " cp " |
| 116 | | Parser_c.TCommentMisc _ -> " misc " |
| 117 | | Parser_c.TComment _ -> " comment " |
| 118 | | _ -> "" in |
| 119 | let b_str = |
| 120 | match b with |
| 121 | | Min (index,adj) -> |
| 122 | Printf.sprintf "-.%d[%s]" |
| 123 | (match adj with Ast_cocci.ADJ n -> n | _ -> -1) |
| 124 | (String.concat " " (List.map string_of_int index)) |
| 125 | | Ctx -> "" in |
| 126 | "T2:"^b_str^t_str^TH.str_of_tok t |
| 127 | | Fake2 b -> |
| 128 | let b_str = |
| 129 | match b with |
| 130 | | Min (index,adj) -> |
| 131 | Printf.sprintf "-%d[%s]" |
| 132 | (match adj with Ast_cocci.ADJ n -> n | _ -> -1) |
| 133 | (String.concat " " (List.map string_of_int index)) |
| 134 | | Ctx -> "" in |
| 135 | b_str^"fake" |
| 136 | | Cocci2 (s,_,lc,rc,_) -> Printf.sprintf "Cocci2:%d:%d%s" lc rc s |
| 137 | | C2 s -> "C2:"^s |
| 138 | | Comma s -> "Comma:"^s |
| 139 | | Indent_cocci2 -> "Indent" |
| 140 | | Unindent_cocci2 _ -> "Unindent" |
| 141 | | EatSpace2 -> "EatSpace" |
| 142 | |
| 143 | (* |
| 144 | let simple_print_all_tokens pr l = |
| 145 | List.iter (function x -> Printf.printf "|%s| " (pr x)) l; |
| 146 | Printf.printf "\n" |
| 147 | *) |
| 148 | |
| 149 | let str_of_token3 = function |
| 150 | | T3 t -> TH.str_of_tok t |
| 151 | | Cocci3 s | C3 s -> s |
| 152 | |
| 153 | |
| 154 | |
| 155 | let mk_token_extended x = |
| 156 | let origidx = |
| 157 | match x with |
| 158 | | T2 (_,_,idx,_) -> idx |
| 159 | | _ -> None in |
| 160 | { tok2 = x; |
| 161 | str = str_of_token2 x; |
| 162 | idx = origidx; |
| 163 | new_tokens_before = []; |
| 164 | remove = false; |
| 165 | } |
| 166 | |
| 167 | let rebuild_tokens_extented toks_ext = |
| 168 | let _tokens = ref [] in |
| 169 | toks_ext +> List.iter (fun tok -> |
| 170 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); |
| 171 | if not tok.remove then push2 tok.tok2 _tokens; |
| 172 | ); |
| 173 | let tokens = List.rev !_tokens in |
| 174 | (tokens +> List.map mk_token_extended) |
| 175 | |
| 176 | |
| 177 | let mcode_contain_plus = function |
| 178 | | Ast_cocci.CONTEXT (_,Ast_cocci.NOTHING) -> false |
| 179 | | Ast_cocci.CONTEXT _ -> true |
| 180 | (* patch: when need full coccinelle transformation *) |
| 181 | | Ast_cocci.MINUS (_,_,_,Ast_cocci.NOREPLACEMENT) -> false |
| 182 | | Ast_cocci.MINUS (_,_,_,Ast_cocci.REPLACEMENT _) -> true(*REPL is not empty*) |
| 183 | | Ast_cocci.PLUS _ -> raise (Impossible 132) |
| 184 | |
| 185 | let contain_plus info = |
| 186 | let mck = Ast_c.mcode_of_info info in |
| 187 | mcode_contain_plus mck |
| 188 | |
| 189 | (*****************************************************************************) |
| 190 | (* Last fix on the ast *) |
| 191 | (*****************************************************************************) |
| 192 | |
| 193 | (* Because of the ugly trick to handle initialiser, I generate fake ',' |
| 194 | * for the last initializer element, but if there is nothing around it, |
| 195 | * I don't want in the end to print it. |
| 196 | *) |
| 197 | |
| 198 | let remove_useless_fakeInfo_struct program = |
| 199 | let bigf = { Visitor_c.default_visitor_c_s with |
| 200 | Visitor_c.kini_s = (fun (k,bigf) ini -> |
| 201 | match k ini with |
| 202 | | Ast_c.InitList args, ii -> |
| 203 | (match ii with |
| 204 | | [_;_] -> ini |
| 205 | | i1 :: i2 :: iicommaopt :: tl when |
| 206 | (not (contain_plus iicommaopt)) |
| 207 | && (not (contain_plus i2)) |
| 208 | && (Ast_c.is_fake iicommaopt) -> |
| 209 | (* sometimes the guy put a normal iicommaopt *) |
| 210 | Ast_c.InitList args, (i1 :: i2 :: tl) |
| 211 | | ii -> Ast_c.InitList args, ii |
| 212 | ) |
| 213 | | x -> x) |
| 214 | } in |
| 215 | Visitor_c.vk_toplevel_s bigf program |
| 216 | |
| 217 | |
| 218 | (*****************************************************************************) |
| 219 | (* Tokens1 generation *) |
| 220 | (*****************************************************************************) |
| 221 | |
| 222 | let get_fakeInfo_and_tokens celem toks = |
| 223 | |
| 224 | let toks_in = ref toks in |
| 225 | let toks_out = ref [] in |
| 226 | |
| 227 | (* todo? verify good order of position ? *) |
| 228 | let pr_elem info = |
| 229 | match Ast_c.pinfo_of_info info with |
| 230 | | Ast_c.FakeTok _ -> |
| 231 | push2 (Fake1 info) toks_out |
| 232 | | Ast_c.OriginTok _ | Ast_c.ExpandedTok _ -> |
| 233 | |
| 234 | (* get the associated comments/space/cppcomment tokens *) |
| 235 | let (before, x, after) = |
| 236 | !toks_in +> split_when (fun tok -> |
| 237 | info =*= TH.info_of_tok tok) |
| 238 | in |
| 239 | assert(info =*= TH.info_of_tok x); |
| 240 | (*old: assert(before +> List.for_all (TH.is_comment)); *) |
| 241 | before +> List.iter (fun x -> |
| 242 | if not (TH.is_comment x) |
| 243 | then pr2 ("WEIRD: not a comment:" ^ TH.str_of_tok x) |
| 244 | (* case such as int asm d3("x"); not yet in ast *) |
| 245 | ); |
| 246 | before +> List.iter (fun x -> push2 (T1 x) toks_out); |
| 247 | |
| 248 | push2 (T1 x) toks_out; |
| 249 | toks_in := after; |
| 250 | | Ast_c.AbstractLineTok _ -> |
| 251 | (* can be called on type info when for instance use -type_c *) |
| 252 | if !Flag_parsing_c.pretty_print_type_info |
| 253 | then push2 (Fake1 info) toks_out |
| 254 | else raise (Impossible 134) (* at this stage *) |
| 255 | in |
| 256 | |
| 257 | let pr_space _ = () in (* use the spacing that is there already *) |
| 258 | |
| 259 | Pretty_print_c.pp_program_gen pr_elem pr_space celem; |
| 260 | |
| 261 | if not (null !toks_in) |
| 262 | then failwith "WEIRD: unparsing not finished"; |
| 263 | |
| 264 | List.rev !toks_out |
| 265 | |
| 266 | (* Fake nodes that have BEFORE code or are - should be moved over any subsequent |
| 267 | whitespace and newlines, but not any comments, to get as close to the affected |
| 268 | code as possible. Similarly, fake nodes that have AFTER code should be moved |
| 269 | backwards. No fake nodes should have both before and after code. *) |
| 270 | |
| 271 | let displace_fake_nodes toks = |
| 272 | let is_fake = function Fake1 _ -> true | _ -> false in |
| 273 | let is_whitespace = function |
| 274 | | T1(Parser_c.TCommentSpace _) |
| 275 | (* patch: cocci *) |
| 276 | | T1(Parser_c.TCommentNewline _) -> true |
| 277 | | _ -> false in |
| 278 | let rec loop toks = |
| 279 | let fake_info = |
| 280 | try Some (split_when is_fake toks) |
| 281 | with Not_found -> None in |
| 282 | match fake_info with |
| 283 | | Some(bef,((Fake1 info) as fake),aft) -> |
| 284 | (match !(info.Ast_c.cocci_tag) with |
| 285 | | Some x -> |
| 286 | (match x with |
| 287 | | (Ast_cocci.MINUS(_,_,_,Ast_cocci.REPLACEMENT _),_) |
| 288 | (* for , replacement is more likely to be like after, but not clear... |
| 289 | but treating it as after breaks a lot of tests. *) |
| 290 | |
| 291 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFORE _),_) -> |
| 292 | (* move the fake node forwards *) |
| 293 | let (whitespace,rest) = span is_whitespace aft in |
| 294 | bef @ whitespace @ fake :: (loop rest) |
| 295 | |
| 296 | | (Ast_cocci.CONTEXT(_,Ast_cocci.AFTER _),_) -> |
| 297 | (* move the fake node backwards *) |
| 298 | let revbef = List.rev bef in |
| 299 | let (revwhitespace,revprev) = span is_whitespace revbef in |
| 300 | let whitespace = List.rev revwhitespace in |
| 301 | let prev = List.rev revprev in |
| 302 | prev @ fake :: (loop (whitespace @ aft)) |
| 303 | | (Ast_cocci.CONTEXT(_,Ast_cocci.BEFOREAFTER _),_) -> |
| 304 | failwith "fake node should not be before-after" |
| 305 | | (Ast_cocci.CONTEXT(_,Ast_cocci.NOTHING),_) |
| 306 | | _ -> bef @ fake :: (loop aft) (* old: was removed when have simpler yacfe *) |
| 307 | ) |
| 308 | | None -> |
| 309 | bef @ fake :: (loop aft) |
| 310 | ) |
| 311 | | None -> toks |
| 312 | | _ -> raise (Impossible 135) in |
| 313 | loop toks |
| 314 | |
| 315 | (*****************************************************************************) |
| 316 | (* Tokens2 generation *) |
| 317 | (*****************************************************************************) |
| 318 | |
| 319 | let comment2t2 = function |
| 320 | | (Token_c.TCommentCpp |
| 321 | (* not sure iif the following list is exhaustive or complete *) |
| 322 | (Token_c.CppAttr|Token_c.CppMacro|Token_c.CppPassingCosWouldGetError), |
| 323 | (info : Token_c.info)) -> |
| 324 | C2(info.Common.str) |
| 325 | | (Token_c.TCommentCpp x,(info : Token_c.info)) -> |
| 326 | C2("\n"^info.Common.str^"\n") |
| 327 | | x -> failwith (Printf.sprintf "unexpected comment %s" (Dumper.dump x)) |
| 328 | |
| 329 | let expand_mcode toks = |
| 330 | let toks_out = ref [] in |
| 331 | |
| 332 | let index = ref 0 in |
| 333 | |
| 334 | let add_elem t minus = |
| 335 | match t with |
| 336 | | Fake1 info -> |
| 337 | let str = Ast_c.str_of_info info in |
| 338 | let isminus = match minus with Min _ -> true | Ctx -> false in |
| 339 | (* don't add fake string if the thing should be removed *) |
| 340 | if str =$= "" or isminus |
| 341 | then push2 (Fake2 minus) toks_out |
| 342 | (* fx the fake "," at the end of a structure or enum. |
| 343 | no idea what other fake info there can be... *) |
| 344 | else push2 (Comma str) toks_out |
| 345 | |
| 346 | | T1 tok -> |
| 347 | (*let (a,b) = !((TH.info_of_tok tok).cocci_tag) in*) |
| 348 | (* no tag on expandedTok ! *) |
| 349 | let modified = function |
| 350 | | None -> false |
| 351 | | Some (Ast_cocci.CONTEXT(pos,Ast_cocci.NOTHING),l) -> false |
| 352 | | _ -> true in |
| 353 | (if TH.is_expanded tok && |
| 354 | modified !((TH.info_of_tok tok).Ast_c.cocci_tag) |
| 355 | (*!((TH.info_of_tok tok).cocci_tag) <> Ast_c.emptyAnnot*) |
| 356 | then |
| 357 | failwith |
| 358 | (Printf.sprintf |
| 359 | "expanded token %s on line %d is either modified or stored in a metavariable" |
| 360 | (TH.str_of_tok tok) (TH.line_of_tok tok))); |
| 361 | |
| 362 | let tok' = tok +> TH.visitor_info_of_tok (fun i -> |
| 363 | { i with Ast_c.cocci_tag = ref Ast_c.emptyAnnot; } |
| 364 | ) in |
| 365 | |
| 366 | let optindex = |
| 367 | if TH.is_origin tok && not (TH.is_real_comment tok) |
| 368 | then |
| 369 | begin |
| 370 | incr index; |
| 371 | Some !index |
| 372 | end |
| 373 | else None |
| 374 | in |
| 375 | |
| 376 | push2 (T2 (tok', minus, optindex, None)) toks_out |
| 377 | in |
| 378 | |
| 379 | let expand_info t = |
| 380 | let (mcode,env) = |
| 381 | Ast_c.mcode_and_env_of_cocciref ((info_of_token1 t).Ast_c.cocci_tag) in |
| 382 | |
| 383 | let pr_cocci s ln col rcol hint = |
| 384 | push2 (Cocci2 (s,ln,col,rcol,hint)) toks_out in |
| 385 | let pr_c info = |
| 386 | (match Ast_c.pinfo_of_info info with |
| 387 | | Ast_c.AbstractLineTok _ -> |
| 388 | push2 (C2 (Ast_c.str_of_info info)) toks_out |
| 389 | | Ast_c.FakeTok (s,_) -> |
| 390 | push2 (C2 s) toks_out |
| 391 | | _ -> |
| 392 | Printf.fprintf stderr "line: %s\n" (Dumper.dump info); |
| 393 | failwith "not an abstract line" |
| 394 | ); |
| 395 | (!(info.Ast_c.comments_tag)).Ast_c.mafter +> |
| 396 | List.iter (fun x -> push2 (comment2t2 x) toks_out) in |
| 397 | |
| 398 | let pr_barrier ln col = (* marks a position, used around C code *) |
| 399 | push2 (Cocci2 ("",ln,col,col,None)) toks_out in |
| 400 | let pr_nobarrier ln col = () in (* not needed for linux spacing *) |
| 401 | |
| 402 | let pr_cspace _ = push2 (C2 " ") toks_out in |
| 403 | |
| 404 | let pr_space _ = () (* rely on add_space in cocci code *) in |
| 405 | let pr_arity _ = () (* not interested *) in |
| 406 | |
| 407 | let indent _ = push2 Indent_cocci2 toks_out in |
| 408 | let unindent x = push2 (Unindent_cocci2 x) toks_out in |
| 409 | let eat_space _ = push2 EatSpace2 toks_out in |
| 410 | |
| 411 | let args_pp = |
| 412 | (env, pr_cocci, pr_c, pr_cspace, |
| 413 | (match !Flag_parsing_c.spacing with |
| 414 | | Flag_parsing_c.SMPL -> pr_space | _ -> pr_cspace), |
| 415 | pr_arity, |
| 416 | (match !Flag_parsing_c.spacing with |
| 417 | | Flag_parsing_c.SMPL -> pr_barrier | _ -> pr_nobarrier), |
| 418 | indent, unindent, eat_space) in |
| 419 | |
| 420 | (* old: when for yacfe with partial cocci: |
| 421 | * add_elem t false; |
| 422 | *) |
| 423 | |
| 424 | (* patch: when need full coccinelle transformation *) |
| 425 | let unparser = Unparse_cocci.pp_list_list_any args_pp false in |
| 426 | match mcode with |
| 427 | | Ast_cocci.MINUS (_,inst,adj,any_xxs) -> |
| 428 | (* Why adding ? because I want to have all the information, the whole |
| 429 | * set of tokens, so I can then process and remove the |
| 430 | * is_between_two_minus for instance *) |
| 431 | add_elem t (Min (inst,adj)); |
| 432 | (match any_xxs with |
| 433 | | Ast_cocci.NOREPLACEMENT -> () |
| 434 | | Ast_cocci.REPLACEMENT(any_xxs,_) -> |
| 435 | unparser any_xxs Unparse_cocci.InPlace |
| 436 | ) |
| 437 | | Ast_cocci.CONTEXT (_,any_befaft) -> |
| 438 | (match any_befaft with |
| 439 | | Ast_cocci.NOTHING -> |
| 440 | add_elem t Ctx |
| 441 | | Ast_cocci.BEFORE (xxs,_) -> |
| 442 | unparser xxs Unparse_cocci.Before; |
| 443 | add_elem t Ctx |
| 444 | | Ast_cocci.AFTER (xxs,_) -> |
| 445 | add_elem t Ctx; |
| 446 | unparser xxs Unparse_cocci.After; |
| 447 | | Ast_cocci.BEFOREAFTER (xxs, yys, _) -> |
| 448 | unparser xxs Unparse_cocci.Before; |
| 449 | add_elem t Ctx; |
| 450 | unparser yys Unparse_cocci.After; |
| 451 | ) |
| 452 | | Ast_cocci.PLUS _ -> raise (Impossible 136) |
| 453 | in |
| 454 | |
| 455 | toks +> List.iter expand_info; |
| 456 | List.rev !toks_out |
| 457 | |
| 458 | |
| 459 | (*****************************************************************************) |
| 460 | (* Tokens2 processing, filtering, adjusting *) |
| 461 | (*****************************************************************************) |
| 462 | |
| 463 | let is_space = function |
| 464 | | T2(Parser_c.TCommentSpace _,_b,_i,_h) -> true (* only whitespace *) |
| 465 | | _ -> false |
| 466 | |
| 467 | let is_newline = function |
| 468 | | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> true |
| 469 | | _ -> false |
| 470 | |
| 471 | let is_whitespace x = |
| 472 | is_space x or is_newline x |
| 473 | |
| 474 | let is_minusable_comment = function |
| 475 | | (T2 (t,_b,_i,_h)) -> |
| 476 | (match t with |
| 477 | | Parser_c.TCommentSpace _ (* only whitespace *) |
| 478 | (* patch: coccinelle *) |
| 479 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
| 480 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
| 481 | | Parser_c.TComment _ |
| 482 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
| 483 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
| 484 | | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _) |
| 485 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> (* result was false *) |
| 486 | true |
| 487 | (* |
| 488 | | Parser_c.TCommentMisc _ |
| 489 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) -> |
| 490 | false |
| 491 | *) |
| 492 | | _ -> false |
| 493 | ) |
| 494 | | _ -> false |
| 495 | |
| 496 | let is_minusable_comment_nocpp = function |
| 497 | | (T2 (t,_b,_i,_h)) -> |
| 498 | (match t with |
| 499 | | Parser_c.TCommentSpace _ (* only whitespace *) |
| 500 | (* patch: coccinelle *) |
| 501 | | Parser_c.TCommentNewline _ (* newline plus whitespace *) -> true |
| 502 | | Parser_c.TComment _ when !Flag_parsing_c.keep_comments -> false |
| 503 | | Parser_c.TComment _ -> true |
| 504 | (* |
| 505 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
| 506 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
| 507 | | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _) |
| 508 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> |
| 509 | false |
| 510 | |
| 511 | | Parser_c.TCommentMisc _ |
| 512 | | Parser_c.TCommentCpp (Token_c.CppPassingCosWouldGetError, _) -> |
| 513 | false |
| 514 | *) |
| 515 | | _ -> false |
| 516 | ) |
| 517 | | _ -> false |
| 518 | |
| 519 | let all_coccis = function |
| 520 | | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 |
| 521 | | Unindent_cocci2 _ | EatSpace2 -> true |
| 522 | | _ -> false |
| 523 | |
| 524 | (* previously gave up if the first character was a newline, but not clear why *) |
| 525 | let is_minusable_comment_or_plus x = |
| 526 | is_minusable_comment x or all_coccis x |
| 527 | |
| 528 | let set_minus_comment adj = function |
| 529 | | T2 (t,Ctx,idx,hint) -> |
| 530 | let str = TH.str_of_tok t in |
| 531 | (match t with |
| 532 | | Parser_c.TCommentSpace _ |
| 533 | (* patch: coccinelle *) |
| 534 | | Parser_c.TCommentNewline _ -> () |
| 535 | |
| 536 | | Parser_c.TComment _ |
| 537 | | Parser_c.TCommentCpp (Token_c.CppAttr, _) |
| 538 | | Parser_c.TCommentCpp (Token_c.CppMacro, _) |
| 539 | | Parser_c.TCommentCpp (Token_c.CppIfDirective _, _) |
| 540 | | Parser_c.TCommentCpp (Token_c.CppDirective, _) -> |
| 541 | pr2 (Printf.sprintf "%d: ERASING_COMMENTS: %s" |
| 542 | (TH.line_of_tok t) str) |
| 543 | | _ -> raise (Impossible 137) |
| 544 | ); |
| 545 | T2 (t, Min adj, idx, hint) |
| 546 | (* patch: coccinelle *) |
| 547 | | T2 (t, Min adj, idx, hint) as x -> x |
| 548 | | Fake2 _ as x -> x |
| 549 | | _ -> raise (Impossible 138) |
| 550 | |
| 551 | (* don't touch ifdefs, done after *) |
| 552 | let set_minus_comment_or_plus adj = function |
| 553 | | Cocci2 _ | C2 _ | Comma _ | Indent_cocci2 |
| 554 | | Unindent_cocci2 _ | EatSpace2 as x -> x |
| 555 | | x -> set_minus_comment adj x |
| 556 | |
| 557 | let is_minus = function |
| 558 | | T2 (_, Min _, _, _) -> true |
| 559 | | _ -> false |
| 560 | |
| 561 | let drop_minus xs = |
| 562 | xs +> exclude is_minus |
| 563 | |
| 564 | let drop_expanded xs = |
| 565 | xs +> exclude (function |
| 566 | | T2 (t,_,_,_) when TH.is_expanded t -> true |
| 567 | | _ -> false |
| 568 | ) |
| 569 | |
| 570 | let drop_fake xs = |
| 571 | xs +> exclude (function |
| 572 | | Fake2 _ -> true |
| 573 | | _ -> false |
| 574 | ) |
| 575 | |
| 576 | let remove_minus_and_between_and_expanded_and_fake xs = |
| 577 | |
| 578 | (* get rid of expanded tok *) |
| 579 | let xs = drop_expanded xs in |
| 580 | |
| 581 | let minus_or_comment x = |
| 582 | is_minus x or is_minusable_comment x in |
| 583 | |
| 584 | let minus_or_comment_nocpp x = |
| 585 | is_minus x or is_minusable_comment_nocpp x in |
| 586 | |
| 587 | let common_adj (index1,adj1) (index2,adj2) = |
| 588 | let same_adj = (* same adjacency info *) |
| 589 | match (adj1,adj2) with |
| 590 | | (Ast_cocci.ADJ adj1,Ast_cocci.ADJ adj2) -> adj1 = adj2 |
| 591 | | (Ast_cocci.ALLMINUS,_) | (_,Ast_cocci.ALLMINUS) -> true in |
| 592 | same_adj && |
| 593 | (* non-empty intersection of witness trees *) |
| 594 | not ((inter_set index1 index2) = []) in |
| 595 | |
| 596 | (* new idea: collects regions not containing non-space context code |
| 597 | if two adjacent adjacent minus tokens satisfy common_adj then delete |
| 598 | all spaces, comments etc between them |
| 599 | if two adjacent minus tokens do not satisfy common_adj only delete |
| 600 | the spaces between them if there are no comments, etc. |
| 601 | if the region contain no plus code and is both preceded and followed |
| 602 | by a newline, delete the initial newline. *) |
| 603 | |
| 604 | let rec adjust_around_minus = function |
| 605 | | [] -> [] |
| 606 | | (T2(Parser_c.TCommentNewline c,_b,_i,_h) as x):: |
| 607 | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs -> |
| 608 | let (minus_list,rest) = span not_context (t1::xs) in |
| 609 | let contains_plus = List.exists is_plus minus_list in |
| 610 | let x = |
| 611 | match List.rev minus_list with |
| 612 | | (T2(Parser_c.TCommentNewline c,_b,_i,_h))::rest |
| 613 | when List.for_all minus_or_comment minus_list -> |
| 614 | set_minus_comment_or_plus adj1 x |
| 615 | | _ -> x in |
| 616 | x :: adjust_within_minus contains_plus minus_list |
| 617 | @ adjust_around_minus rest |
| 618 | | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs -> |
| 619 | let (minus_list,rest) = span not_context (t1::xs) in |
| 620 | let contains_plus = List.exists is_plus minus_list in |
| 621 | adjust_within_minus contains_plus minus_list |
| 622 | @ adjust_around_minus rest |
| 623 | | x::xs -> |
| 624 | x :: adjust_around_minus xs |
| 625 | and adjust_within_minus cp (* contains plus *) = function |
| 626 | | ((Fake2(Min adj1) | T2(_,Min adj1,_,_)) as t1)::xs -> |
| 627 | let not_minus = function T2(_,Min _,_,_) -> false | _ -> true in |
| 628 | let (not_minus_list,rest) = span not_minus xs in |
| 629 | t1 :: |
| 630 | (match rest with |
| 631 | | ((Fake2(Min adj2) | T2(_,Min adj2,_,_)) as t2)::xs -> |
| 632 | if common_adj adj1 adj2 |
| 633 | || not cp && List.for_all is_whitespace not_minus_list |
| 634 | then |
| 635 | (List.map (set_minus_comment_or_plus adj1) not_minus_list) |
| 636 | @ (adjust_within_minus cp (t2::xs)) |
| 637 | else |
| 638 | not_minus_list |
| 639 | @ (adjust_within_minus cp (t2::xs)) |
| 640 | | _ -> |
| 641 | if cp |
| 642 | then xs |
| 643 | else |
| 644 | (* remove spaces after removed stuff, eg a comma after a |
| 645 | function argument *) |
| 646 | (let (spaces,rest) = span is_space xs in |
| 647 | (List.map (set_minus_comment_or_plus adj1) spaces) |
| 648 | @ rest) |
| 649 | ) |
| 650 | | xs -> failwith "should always start with minus" |
| 651 | and not_context = function |
| 652 | | (T2(_,Ctx,_,_) as x) when not (is_minusable_comment x) -> false |
| 653 | | _ -> true |
| 654 | and is_plus = function |
| 655 | | C2 _ | Comma _ | Cocci2 _ -> true |
| 656 | | _ -> false in |
| 657 | |
| 658 | let xs = adjust_around_minus xs in |
| 659 | |
| 660 | (* get rid of fake tok *) |
| 661 | let xs = drop_fake xs in |
| 662 | |
| 663 | (* this drops blank lines after a brace introduced by removing code *) |
| 664 | let minus_or_comment_nonl = function |
| 665 | | T2(_,Min adj,_,_) -> true |
| 666 | | T2(Parser_c.TCommentNewline _,_b,_i,_h) -> false |
| 667 | | x -> is_minusable_comment x in |
| 668 | |
| 669 | let rec adjust_after_brace = function |
| 670 | | [] -> [] |
| 671 | | ((T2(_,Ctx,_,_)) as x)::((T2(_,Min adj,_,_)::_) as xs) |
| 672 | when str_of_token2 x =$= "{" -> |
| 673 | let (between_minus,rest) = span minus_or_comment_nonl xs in |
| 674 | let (newlines,rest) = span is_whitespace rest in |
| 675 | let (drop_newlines,last_newline) = |
| 676 | let rec loop = function |
| 677 | | [] -> ([],[]) |
| 678 | | ((T2(Parser_c.TCommentNewline _,_b,_i,_h)) as x) :: rest -> |
| 679 | (List.rev rest,[x]) |
| 680 | | x::xs -> |
| 681 | let (drop_newlines,last_newline) = loop xs in |
| 682 | (drop_newlines,x::last_newline) in |
| 683 | loop (List.rev newlines) in |
| 684 | x :: between_minus |
| 685 | @ List.map (set_minus_comment adj) drop_newlines |
| 686 | @ last_newline |
| 687 | @ adjust_after_brace rest |
| 688 | | x::xs -> x :: (adjust_after_brace xs) in |
| 689 | |
| 690 | let xs = adjust_after_brace xs in |
| 691 | |
| 692 | (* search backwards from context } over spaces until reaching a newline. |
| 693 | then go back over all minus code until reaching some context or + code. |
| 694 | get rid of all intervening spaces, newlines, and comments |
| 695 | input is reversed *) |
| 696 | let rec adjust_before_brace = function |
| 697 | | [] -> [] |
| 698 | | ((T2(t,Ctx,_,_)) as x)::xs |
| 699 | when str_of_token2 x =$= "}" or is_newline x -> |
| 700 | let (outer_spaces,rest) = span is_space xs in |
| 701 | x :: outer_spaces @ |
| 702 | (match rest with |
| 703 | | ((T2 (Parser_c.TCommentNewline _,Ctx,_i,_h)) as h) :: |
| 704 | (* the rest of this code is the same as from_newline below |
| 705 | but merging them seems to be error prone... *) |
| 706 | ((T2 (t, Min adj, idx, hint)) as m) :: rest -> |
| 707 | let (spaces,rest) = span minus_or_comment_nocpp rest in |
| 708 | h :: m :: |
| 709 | (List.map (set_minus_comment adj) spaces) @ |
| 710 | (adjust_before_brace rest) |
| 711 | | _ -> adjust_before_brace rest |
| 712 | ) |
| 713 | | x::xs -> x :: (adjust_before_brace xs) in |
| 714 | |
| 715 | let from_newline = function |
| 716 | | ((T2 (t, Min adj, idx, hint)) as m) :: rest -> |
| 717 | let (spaces,rest) = span minus_or_comment_nocpp rest in |
| 718 | m :: |
| 719 | (List.map (set_minus_comment adj) spaces) @ |
| 720 | (adjust_before_brace rest) |
| 721 | | ((T2 (t0,Ctx, idx0,h0)) as m0) :: |
| 722 | ((T2 (t,Min adj,idx,h)) as m) :: rest |
| 723 | when TH.str_of_tok t0 = "" -> |
| 724 | (* This is for the case of a #define that is completely deleted, |
| 725 | because a #define has a strange EOL token at the end. |
| 726 | We hope there i no other kind of token that is represented by |
| 727 | "", but it seems like changing the kind of token might break |
| 728 | the end of entity recognition in the C parser. |
| 729 | See parsing_hacks.ml *) |
| 730 | let (spaces,rest) = span minus_or_comment_nocpp rest in |
| 731 | m0 :: m :: |
| 732 | (List.map (set_minus_comment adj) spaces) @ |
| 733 | (adjust_before_brace rest) |
| 734 | | rest -> adjust_before_brace rest in |
| 735 | |
| 736 | let xs = List.rev (from_newline (List.rev xs)) in |
| 737 | |
| 738 | let cleanup_ifdefs toks = |
| 739 | (* TODO: these functions are horrid, but using tokens caused circularity *) |
| 740 | let is_ifdef = function |
| 741 | | T2((Parser_c.TCommentCpp |
| 742 | (Token_c.CppIfDirective Token_c.IfDef, _)),m,idx,_) -> true |
| 743 | | T2((Parser_c.TCommentCpp |
| 744 | (Token_c.CppIfDirective Token_c.IfDef0, _)),m,idx,_) -> true |
| 745 | | t -> false in |
| 746 | let is_else = function |
| 747 | | T2((Parser_c.TCommentCpp |
| 748 | (Token_c.CppIfDirective Token_c.Else, _)),m,idx,_) -> true |
| 749 | | _ -> false in |
| 750 | let is_endif = function |
| 751 | | T2((Parser_c.TCommentCpp |
| 752 | (Token_c.CppIfDirective Token_c.Endif, _)),m,idx,_) -> true |
| 753 | | _ -> false in |
| 754 | let add t = function |
| 755 | | l::rest -> (t::l)::rest |
| 756 | | _ -> failwith "not possible" in |
| 757 | let rec parse_ifdef acc_keywords acc_code stack = function |
| 758 | | [] -> (None,acc_keywords,acc_code) |
| 759 | | t::rest when is_else t -> |
| 760 | (match stack with |
| 761 | | [] -> parse_ifdef (t::acc_keywords) ([]::acc_code) stack rest |
| 762 | | _ -> parse_ifdef acc_keywords (add t acc_code) stack rest |
| 763 | ) |
| 764 | | t::rest when is_endif t -> |
| 765 | (match stack with |
| 766 | | [] -> ((Some (t,rest)),acc_keywords,acc_code) |
| 767 | | _::stack -> parse_ifdef acc_keywords (add t acc_code) stack rest |
| 768 | ) |
| 769 | | t::rest when is_ifdef t -> |
| 770 | parse_ifdef acc_keywords (add t acc_code) (()::stack) rest |
| 771 | | t::rest -> parse_ifdef acc_keywords (add t acc_code) stack rest in |
| 772 | let unminus = function |
| 773 | | T2 (t,Min adj,idx,hint) -> T2 (t,Ctx,idx,hint) |
| 774 | | x -> x in |
| 775 | let rec loop = function |
| 776 | | [] -> [] |
| 777 | | t::rest when is_ifdef t -> |
| 778 | let (ender,acc_keywords,acc_code) = |
| 779 | parse_ifdef [t] [[]] [] rest in |
| 780 | let acc_code = List.map loop acc_code in |
| 781 | let merge = (* args reversed *) |
| 782 | List.fold_left2 |
| 783 | (fun prev kwd code -> kwd :: (List.rev code) @ prev) |
| 784 | [] in |
| 785 | (match ender with |
| 786 | | None -> merge (List.map unminus acc_keywords) acc_code |
| 787 | | Some(endif,rest) -> |
| 788 | let rest = loop rest in |
| 789 | if List.for_all is_minus (endif :: acc_keywords) |
| 790 | then (merge acc_keywords acc_code) @ (endif :: rest) |
| 791 | else |
| 792 | (merge (List.map unminus acc_keywords) acc_code) @ |
| 793 | ((unminus endif) :: rest) |
| 794 | ) |
| 795 | | x::xs -> x :: loop xs in |
| 796 | loop toks in |
| 797 | |
| 798 | let xs = cleanup_ifdefs xs in |
| 799 | let xs = drop_minus xs in |
| 800 | xs |
| 801 | |
| 802 | (* things that should not be followed by space - boundary between SmPL |
| 803 | code and C code *) |
| 804 | let adjust_eat_space toks = |
| 805 | let rec loop = function |
| 806 | | [] -> [] |
| 807 | | EatSpace2 :: x :: rest when is_space x -> loop rest |
| 808 | | EatSpace2 :: rest -> loop rest |
| 809 | | x :: xs -> x :: loop xs in |
| 810 | loop toks |
| 811 | |
| 812 | (* normally, in C code, a semicolon is not preceded by a space or newline *) |
| 813 | let adjust_before_semicolon toks = |
| 814 | let toks = List.rev toks in |
| 815 | let rec search_semic = function |
| 816 | | [] -> [] |
| 817 | | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs |
| 818 | when List.mem (str_of_token2 x) [";";")";","] -> |
| 819 | x :: search_semic (search_minus false xs) |
| 820 | | x::xs -> x :: search_semic xs |
| 821 | and search_minus seen_minus xs = |
| 822 | let (spaces, rest) = span is_space xs in |
| 823 | (* only delete spaces if something is actually deleted *) |
| 824 | match rest with |
| 825 | | ((T2(_,Min _,_,_)) as a)::rerest -> a :: search_minus true rerest |
| 826 | | _ -> if seen_minus then rest else xs in |
| 827 | List.rev (search_semic toks) |
| 828 | |
| 829 | (* normally, in C code, a ( is not followed by a space or newline *) |
| 830 | let adjust_after_paren toks = |
| 831 | let rec search_paren = function |
| 832 | | [] -> [] |
| 833 | | ((T2(_,Ctx,_,_) | Cocci2 _) as x)::xs |
| 834 | when List.mem (str_of_token2 x) ["("] (* other things? *) -> |
| 835 | x :: search_paren (search_minus false xs) |
| 836 | | x::xs -> x :: search_paren xs |
| 837 | and search_minus seen_minus xs = |
| 838 | let (spaces, rest) = span is_whitespace xs in |
| 839 | (* only delete spaces if something is actually deleted *) |
| 840 | match rest with |
| 841 | | ((T2(_,Min _,_,_)) as a)::rerest -> (* minus *) |
| 842 | a :: search_minus true rerest |
| 843 | | ((T2(_,Ctx,_,_)) as a)::rerest |
| 844 | when seen_minus && str_of_token2 a = "," -> |
| 845 | (* comma after ( will be deleted, so consider it as minus code |
| 846 | already *) |
| 847 | a :: search_minus true rerest |
| 848 | | _ -> if seen_minus then rest else xs in (* drop trailing space *) |
| 849 | search_paren toks |
| 850 | |
| 851 | (* this is for the case where braces are added around an if branch *) |
| 852 | let paren_then_brace toks = |
| 853 | let rec search_paren = function |
| 854 | | [] -> [] |
| 855 | | ((T2(_,Ctx,_,_)) as x)::xs |
| 856 | when List.mem (str_of_token2 x) [")"] -> |
| 857 | x :: search_paren (search_plus xs) |
| 858 | | x::xs -> x :: search_paren xs |
| 859 | and search_plus xs = |
| 860 | let (spaces, rest) = span is_space xs in |
| 861 | let (nls, rest) = span is_newline rest in |
| 862 | match rest with |
| 863 | (* move the brace up to the previous line *) |
| 864 | | ((Cocci2("{",_,_,_,_)) as x) :: (((Cocci2 _) :: _) as rest) -> |
| 865 | spaces @ x :: nls @ rest |
| 866 | | _ -> xs in |
| 867 | search_paren toks |
| 868 | |
| 869 | let is_ident_like s = s ==~ regexp_alpha |
| 870 | |
| 871 | let rec drop_space_at_endline = function |
| 872 | | [] -> [] |
| 873 | | [x] -> [x] |
| 874 | | (C2 " ") :: |
| 875 | ((((T2(Parser_c.TCommentSpace _,Ctx,_,_)) | Cocci2("\n",_,_,_,_) | |
| 876 | (T2(Parser_c.TCommentNewline _,Ctx,_,_))) :: _) as rest) -> |
| 877 | (* when unparse_cocci doesn't know whether space is needed *) |
| 878 | drop_space_at_endline rest |
| 879 | | ((T2(Parser_c.TCommentSpace _,Ctx,_i,_h)) as a)::rest -> |
| 880 | let (outer_spaces,rest) = span is_space rest in |
| 881 | let minus_or_comment_or_space_nocpp = function |
| 882 | | T2(_,Min adj,_,_) -> true |
| 883 | | (T2(Parser_c.TCommentSpace _,Ctx,_i,_)) -> true |
| 884 | | (T2(Parser_c.TCommentNewline _,Ctx,_i,_)) -> false |
| 885 | | x -> false in |
| 886 | let (minus,rest) = span minus_or_comment_or_space_nocpp rest in |
| 887 | let fail _ = a :: outer_spaces @ minus @ (drop_space_at_endline rest) in |
| 888 | if List.exists is_minus minus |
| 889 | then |
| 890 | match rest with |
| 891 | | ((T2(Parser_c.TCommentNewline _,Ctx,_i,_h)) as a)::rest -> |
| 892 | (* drop trailing spaces *) |
| 893 | minus @ a :: (drop_space_at_endline rest) |
| 894 | | _ -> fail () |
| 895 | else fail () |
| 896 | | a :: rest -> |
| 897 | a :: drop_space_at_endline rest |
| 898 | |
| 899 | (* if a removed ( is between two tokens, then add a space *) |
| 900 | let rec paren_to_space = function |
| 901 | | [] -> [] |
| 902 | | [x] -> [x] |
| 903 | | [x;y] -> [x;y] |
| 904 | | ((T2(_,Ctx,_,_)) as a):: |
| 905 | ((T2(t,Min _,_,_)) as b):: |
| 906 | ((T2(_,Ctx,_,_)) as c)::rest |
| 907 | when not (is_whitespace a) && TH.str_of_tok t = "(" -> |
| 908 | a :: b :: (C2 " ") :: (paren_to_space (c :: rest)) |
| 909 | | a :: rest -> a :: (paren_to_space rest) |
| 910 | |
| 911 | let rec add_space xs = |
| 912 | match xs with |
| 913 | | [] -> [] |
| 914 | | [x] -> [x] |
| 915 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
| 916 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && |
| 917 | not (lnx = -1) && not (rcolx = -1) && lnx = lny && rcolx < lcoly -> |
| 918 | (* this only works within a line. could consider whether |
| 919 | something should be done to add newlines too, rather than |
| 920 | printing them explicitly in unparse_cocci. *) |
| 921 | x::C2 (String.make (lcoly-rcolx) ' ')::add_space (y::xs) |
| 922 | | (Cocci2(sx,lnx,_,rcolx,_) as x)::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
| 923 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL && |
| 924 | not (lnx = -1) && not (rcolx = -1) && lnx < lny -> |
| 925 | (* this only works within a line. could consider whether |
| 926 | something should be done to add newlines too, rather than |
| 927 | printing them explicitly in unparse_cocci. *) |
| 928 | x::C2 (String.make (lny-lnx) '\n'):: |
| 929 | C2 (String.make (lcoly-1) ' '):: (* -1 is for the + *) |
| 930 | add_space (y::xs) |
| 931 | | ((T2(_,Ctx,_,_)) as x)::((Cocci2 _) as y)::xs -> (* add space on boundary *) |
| 932 | let sx = str_of_token2 x in |
| 933 | let sy = str_of_token2 y in |
| 934 | if is_ident_like sx && (is_ident_like sy or List.mem sy ["="]) |
| 935 | then x::C2 " "::(add_space (y::xs)) |
| 936 | else x::(add_space (y::xs)) |
| 937 | | x::y::xs -> (* not boundary, not sure if it is possible *) |
| 938 | let sx = str_of_token2 x in |
| 939 | let sy = str_of_token2 y in |
| 940 | if is_ident_like sx && is_ident_like sy |
| 941 | then x::C2 " "::(add_space (y::xs)) |
| 942 | else x::(add_space (y::xs)) |
| 943 | |
| 944 | (* A fake comma is added at the end of an unordered initlist or a enum |
| 945 | decl, if the initlist or enum doesn't already end in a comma. This is only |
| 946 | needed if there is + code, ie if we see Cocci after it in the code sequence *) |
| 947 | |
| 948 | let rec drop_end_comma = function |
| 949 | | [] -> [] |
| 950 | | [x] -> [x] |
| 951 | | ((Comma ",") as x) :: rest -> |
| 952 | let (newlines,rest2) = span is_whitespace rest in |
| 953 | (match rest2 with |
| 954 | | (Cocci2 _) :: _ -> x :: drop_end_comma rest |
| 955 | | _ -> drop_end_comma rest |
| 956 | ) |
| 957 | | x :: xs -> x :: drop_end_comma xs |
| 958 | |
| 959 | (* The following only works for the outermost function call. Stack records |
| 960 | the column of all open parentheses. Space_cell contains the most recent |
| 961 | comma in the outermost function call. The goal is to decide whether this |
| 962 | should be followed by a space or a newline and indent. *) |
| 963 | let add_newlines toks tabbing_unit = |
| 964 | (* the following is for strings that may contain newline or tabs *) |
| 965 | let string_length s count = |
| 966 | let l = list_of_string s in |
| 967 | List.fold_left |
| 968 | (function count -> |
| 969 | function |
| 970 | | '\t' -> count + 8 |
| 971 | | '\n' -> 0 |
| 972 | | c -> count + 1) |
| 973 | count l in |
| 974 | let create_indent n = |
| 975 | let (tu,tlen) = |
| 976 | match tabbing_unit with |
| 977 | | Some ("\t",_) -> ("\t",8) |
| 978 | | Some ("",_) -> ("\t",8) (* not sure why... *) |
| 979 | | Some (s,_) -> (s,string_length s 0) (* assuming only tabs or spaces *) |
| 980 | | None -> ("\t",8) in |
| 981 | let rec loop seen = |
| 982 | if seen + tlen <= n |
| 983 | then tu ^ loop (seen + tlen) |
| 984 | else String.make (n-seen) ' ' in |
| 985 | loop 0 in |
| 986 | let check_for_newline count x = function |
| 987 | | Some (start,space_cell) when count > Flag_parsing_c.max_width -> |
| 988 | space_cell := "\n"^(create_indent x); |
| 989 | Some (x + (count - start)) |
| 990 | | _ -> None in |
| 991 | let start_box stack space_cell count s = |
| 992 | let count = string_length s count in |
| 993 | (count,count::stack,space_cell) in |
| 994 | let end_box stack space_cell count s = |
| 995 | (* this assumes that start_box and end_box are matched, but this is not |
| 996 | necessarily the case, if ( is modified and ) is context code *) |
| 997 | let count = string_length s count in |
| 998 | match stack with |
| 999 | | [x] -> |
| 1000 | (match check_for_newline count x space_cell with |
| 1001 | | Some count -> (count,[],None) |
| 1002 | | None -> (count,[],None) |
| 1003 | ) |
| 1004 | | [] -> (count,stack,space_cell) |
| 1005 | | _ -> (count,List.tl stack,space_cell) in |
| 1006 | let rec loop ((stack,space_cell) as info) count = function |
| 1007 | | [] -> [] |
| 1008 | | ((Cocci2(s,line,lcol,rcol,hint)) as a):: (* hint can't be start *) |
| 1009 | (T2(commatok,Ctx,idx,_)):: |
| 1010 | (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,_,_)) :: |
| 1011 | (((T2(codetok,Ctx,_,_)) :: _) as xs) |
| 1012 | when |
| 1013 | (TH.str_of_tok commatok) = "," && |
| 1014 | (TH.str_of_tok sptok) = " " && |
| 1015 | ((List.length stack = 1) or (* not super elegant... *) |
| 1016 | (* check if the Cocci2 token is a ), need double treatment *) |
| 1017 | (List.length stack = 2) && (hint = Some Unparse_cocci.EndBox)) -> |
| 1018 | (* something large added before a comma *) |
| 1019 | let stack = (* do the work of end_box in the length 2 case *) |
| 1020 | if List.length stack = 2 |
| 1021 | then List.tl stack |
| 1022 | else stack in |
| 1023 | let x = List.hd stack in |
| 1024 | let sp = ref " " in |
| 1025 | let cocci_count = string_length s count in |
| 1026 | let space_cell = Some (cocci_count+1,sp) in (* count before space *) |
| 1027 | let newcount = cocci_count + 2 in (* count incuding space *) |
| 1028 | let future_count = |
| 1029 | string_length (TH.str_of_tok codetok) newcount in |
| 1030 | let b = T2(commatok,Ctx,idx, |
| 1031 | Some (Unparse_cocci.SpaceOrNewline sp)) in |
| 1032 | (match check_for_newline future_count x space_cell with |
| 1033 | | Some count -> a :: b :: loop (stack,Some (x,sp)) count xs |
| 1034 | | None -> a :: b :: loop (stack,Some (newcount,sp)) newcount xs |
| 1035 | ) |
| 1036 | | (T2(commatok,Ctx,_,_)):: |
| 1037 | (T2(((Parser_c.TCommentSpace _) as sptok),Ctx,idx,_)) :: |
| 1038 | (((Cocci2(s,line,lcol,rcol,hint))::_) as xs) |
| 1039 | when (TH.str_of_tok commatok) = "," && (TH.str_of_tok sptok) = " " && |
| 1040 | List.length stack = 1 (* not super elegant... *) -> |
| 1041 | (* something large added after a comma *) |
| 1042 | let x = List.hd stack in |
| 1043 | let sp = ref " " in |
| 1044 | let space_cell = Some (count+1,sp) in (* count before space *) |
| 1045 | let newcount = count + 2 in (* count incuding space *) |
| 1046 | let future_count = string_length s newcount in |
| 1047 | let a = T2(commatok,Ctx,idx, |
| 1048 | Some (Unparse_cocci.SpaceOrNewline sp)) in |
| 1049 | (match check_for_newline future_count x space_cell with |
| 1050 | | Some count -> a :: loop (stack,Some (x,sp)) count xs |
| 1051 | | None -> a :: loop (stack,Some (newcount,sp)) newcount xs |
| 1052 | ) |
| 1053 | | ((T2(tok,Ctx,idx,_)) as a)::xs -> |
| 1054 | (* let (stack,space_cell) = info in *) |
| 1055 | (match TH.str_of_tok tok with |
| 1056 | | "=" as s -> |
| 1057 | let (spaces,rest) = span is_space xs in |
| 1058 | (match rest with |
| 1059 | | ((T2(tok,Ctx,_,_)) as b)::ixs -> |
| 1060 | (match TH.str_of_tok tok with |
| 1061 | | "{" -> |
| 1062 | let newcount = |
| 1063 | List.fold_left |
| 1064 | (function prev -> |
| 1065 | function |
| 1066 | | (T2(tok,_b,_i,_h)) -> |
| 1067 | string_length (TH.str_of_tok tok) prev |
| 1068 | | _ -> failwith "not possible") |
| 1069 | count spaces in |
| 1070 | let front = a :: spaces @ [b] in |
| 1071 | let (newcount,newstack,newspacecell) = |
| 1072 | start_box stack space_cell newcount "{" in |
| 1073 | front @ loop (newstack,newspacecell) newcount ixs |
| 1074 | | s -> a :: loop info (string_length s count) xs |
| 1075 | ) |
| 1076 | | _ -> a :: loop info (string_length s count) xs |
| 1077 | ) |
| 1078 | | "(" as s -> |
| 1079 | let (newcount,newstack,newspacecell) = |
| 1080 | start_box stack space_cell count s in |
| 1081 | a :: loop (newstack,newspacecell) newcount xs |
| 1082 | | ")" as s -> |
| 1083 | let (newcount,newstack,newspacecell) = |
| 1084 | end_box stack space_cell count s in |
| 1085 | a :: loop (newstack,newspacecell) newcount xs |
| 1086 | | "{" as s when not (stack = []) -> |
| 1087 | (* [] case means statement braces *) |
| 1088 | let (newcount,newstack,newspacecell) = |
| 1089 | start_box stack space_cell count s in |
| 1090 | a :: loop (newstack,newspacecell) newcount xs |
| 1091 | | "}" as s when not (stack = []) -> |
| 1092 | (* [] case means statement braces *) |
| 1093 | let (newcount,newstack,newspacecell) = |
| 1094 | end_box stack space_cell count s in |
| 1095 | a :: loop (newstack,newspacecell) newcount xs |
| 1096 | | s -> a :: loop info (string_length s count) xs |
| 1097 | ) |
| 1098 | | ((Cocci2(s,line,lcol,rcol,hint)) as a)::xs -> |
| 1099 | let (stack,space_cell) = info in |
| 1100 | let rest = |
| 1101 | match hint with |
| 1102 | | None -> loop info (string_length s count) xs |
| 1103 | | Some Unparse_cocci.StartBox -> |
| 1104 | let (newcount,newstack,newspacecell) = |
| 1105 | start_box stack space_cell count s in |
| 1106 | loop (newstack,newspacecell) newcount xs |
| 1107 | | Some Unparse_cocci.EndBox -> |
| 1108 | let (newcount,newstack,newspacecell) = |
| 1109 | end_box stack space_cell count s in |
| 1110 | loop (newstack,newspacecell) newcount xs |
| 1111 | | Some (Unparse_cocci.SpaceOrNewline sp) -> |
| 1112 | let count = string_length s (count + 1 (*space*)) in |
| 1113 | (match stack with |
| 1114 | | [x] -> |
| 1115 | (match check_for_newline count x space_cell with |
| 1116 | | Some count -> loop (stack,Some (x,sp)) count xs |
| 1117 | | None -> loop (stack,Some (count,sp)) count xs |
| 1118 | ) |
| 1119 | | _ -> loop info count xs |
| 1120 | ) in |
| 1121 | a :: rest |
| 1122 | | ((T2(tok,_,_,_)) as a)::xs -> |
| 1123 | a :: loop info (string_length (TH.str_of_tok tok) count) xs |
| 1124 | | ((C2(s)) as a)::xs -> a :: loop info (string_length s count) xs |
| 1125 | | ((Comma(s)) as a)::xs -> a :: loop info (string_length s count) xs |
| 1126 | | Fake2 _ :: _ | Indent_cocci2 :: _ |
| 1127 | | Unindent_cocci2 _::_ | EatSpace2::_ -> |
| 1128 | failwith "unexpected fake, indent, unindent, or eatspace" in |
| 1129 | let redo_spaces prev = function |
| 1130 | | Cocci2(s,line,lcol,rcol,Some (Unparse_cocci.SpaceOrNewline sp)) -> |
| 1131 | C2 !sp :: Cocci2(s,line,lcol,rcol,None) :: prev |
| 1132 | | T2(tok,min,idx,Some (Unparse_cocci.SpaceOrNewline sp)) -> |
| 1133 | C2 !sp :: T2(tok,min,idx,None) :: prev |
| 1134 | | t -> t::prev in |
| 1135 | (match !Flag_parsing_c.spacing with |
| 1136 | | Flag_parsing_c.SMPL -> toks |
| 1137 | | _ -> List.rev (List.fold_left redo_spaces [] (loop ([],None) 0 toks)) |
| 1138 | ) |
| 1139 | |
| 1140 | (* When insert some new code, because of a + in a SP, we must add this |
| 1141 | * code at the right place, with the good indentation. So each time we |
| 1142 | * encounter some spacing info, with some newline, we maintain the |
| 1143 | * current indentation level used. |
| 1144 | * |
| 1145 | * TODO problems: not accurate. ex: TODO |
| 1146 | * |
| 1147 | * TODO: if in #define region, should add a \ \n |
| 1148 | *) |
| 1149 | let new_tabbing2 space = |
| 1150 | list_of_string space |
| 1151 | +> List.rev |
| 1152 | +> take_until (fun c -> c =<= '\n') |
| 1153 | +> List.rev |
| 1154 | +> List.map string_of_char |
| 1155 | +> String.concat "" |
| 1156 | |
| 1157 | let new_tabbing a = |
| 1158 | profile_code "C unparsing.new_tabbing" (fun () -> new_tabbing2 a) |
| 1159 | |
| 1160 | |
| 1161 | let rec adjust_indentation xs = |
| 1162 | |
| 1163 | let _current_tabbing = ref ([] : string list) in |
| 1164 | let tabbing_unit = ref None in |
| 1165 | |
| 1166 | let string_of_list l = String.concat "" (List.map string_of_char l) in |
| 1167 | |
| 1168 | (* try to pick a tabbing unit for the plus code *) |
| 1169 | let adjust_tabbing_unit old_tab new_tab = |
| 1170 | if !tabbing_unit =*= None && String.length new_tab > String.length old_tab |
| 1171 | then |
| 1172 | let old_tab = list_of_string old_tab in |
| 1173 | let new_tab = list_of_string new_tab in |
| 1174 | let rec loop = function |
| 1175 | | ([],new_tab) -> |
| 1176 | tabbing_unit := Some(string_of_list new_tab,List.rev new_tab) |
| 1177 | | (_,[]) -> failwith "not possible" |
| 1178 | | (o::os,n::ns) -> loop (os,ns) in (* could check for equality *) |
| 1179 | loop (old_tab,new_tab) in |
| 1180 | |
| 1181 | (* |
| 1182 | let remtab tu current_tab = |
| 1183 | let current_tab = List.rev(list_of_string current_tab) in |
| 1184 | let rec loop = function |
| 1185 | ([],new_tab) -> string_of_list (List.rev new_tab) |
| 1186 | | (_,[]) -> (-*weird; tabbing unit used up more than the current tab*-) |
| 1187 | "" |
| 1188 | | (t::ts,n::ns) when t =<= n -> loop (ts,ns) |
| 1189 | | (_,ns) -> (-* mismatch; remove what we can *-) |
| 1190 | string_of_list (List.rev ns) in |
| 1191 | loop (tu,current_tab) in |
| 1192 | *) |
| 1193 | |
| 1194 | let rec find_first_tab started = function |
| 1195 | | [] -> () |
| 1196 | | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
| 1197 | find_first_tab true xs |
| 1198 | (* patch: coccinelle *) |
| 1199 | | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::_ |
| 1200 | when started -> |
| 1201 | let s = str_of_token2 x +> new_tabbing in |
| 1202 | tabbing_unit := Some (s,List.rev (list_of_string s)) |
| 1203 | | x::xs -> find_first_tab started xs in |
| 1204 | find_first_tab false xs; |
| 1205 | |
| 1206 | let rec balanced ct = function |
| 1207 | | [] -> ct >= 0 |
| 1208 | | ((T2(tok,_,_,_)) as x)::xs -> |
| 1209 | (match str_of_token2 x with |
| 1210 | | "(" -> balanced (ct+1) xs |
| 1211 | | ")" -> balanced (ct-1) xs |
| 1212 | | _ -> balanced ct xs |
| 1213 | ) |
| 1214 | | x::xs -> balanced ct xs in |
| 1215 | |
| 1216 | let update_tabbing started s x = |
| 1217 | let old_tabbing = !_current_tabbing in |
| 1218 | str_of_token2 x +> new_tabbing +> (fun s -> _current_tabbing := [s]); |
| 1219 | (* only trust the indentation after the first { *) |
| 1220 | if started |
| 1221 | then |
| 1222 | adjust_tabbing_unit |
| 1223 | (String.concat "" old_tabbing) |
| 1224 | (String.concat "" !_current_tabbing) in |
| 1225 | |
| 1226 | let rec aux started xs = |
| 1227 | match xs with |
| 1228 | | [] -> [] |
| 1229 | (* patch: coccinelle *) |
| 1230 | | ((T2 (Parser_c.TCommentNewline s,_,_,_)) as x):: |
| 1231 | Unindent_cocci2(false)::xs -> |
| 1232 | update_tabbing started s x; |
| 1233 | (C2 "\n")::aux started xs |
| 1234 | | (Cocci2("\n",_,_,_,_))::Unindent_cocci2(false)::xs -> |
| 1235 | (C2 "\n")::aux started xs |
| 1236 | | ((T2 (tok,_,_,_)) as x)::(T2 (Parser_c.TCommentNewline s, _, _, _)):: |
| 1237 | ((Cocci2 ("{",_,_,_,_)) as a)::xs |
| 1238 | when started && str_of_token2 x =$= ")" -> |
| 1239 | (* to be done for if, etc, but not for a function header *) |
| 1240 | x::(C2 " ")::a::(aux started xs) |
| 1241 | | ((T2 (Parser_c.TCommentNewline s, _, _, _)) as x)::xs |
| 1242 | when |
| 1243 | balanced 0 (fst(span (function x -> not(is_newline x)) xs)) -> |
| 1244 | update_tabbing started s x; |
| 1245 | let coccis_rest = span all_coccis xs in |
| 1246 | (match coccis_rest with |
| 1247 | | (_::_,((T2 (tok,_,_,_)) as y)::_) when str_of_token2 y =$= "}" -> |
| 1248 | (* the case where cocci code has been added before a close } *) |
| 1249 | x::aux started (Indent_cocci2::xs) |
| 1250 | | _ -> x::aux started xs |
| 1251 | ) |
| 1252 | | Indent_cocci2::((Cocci2(sy,lny,lcoly,_,_)) as y)::xs |
| 1253 | when !Flag_parsing_c.spacing = Flag_parsing_c.SMPL -> |
| 1254 | let tu = String.make (lcoly-1) ' ' in |
| 1255 | _current_tabbing := tu::(!_current_tabbing); |
| 1256 | C2 (tu)::aux started (y::xs) |
| 1257 | | Indent_cocci2::xs -> |
| 1258 | (match !tabbing_unit with |
| 1259 | | None -> aux started xs |
| 1260 | | Some (tu,_) -> |
| 1261 | _current_tabbing := tu::(!_current_tabbing); |
| 1262 | (* can't be C2, for later phases *) |
| 1263 | Cocci2 (tu,-1,-1,-1,None)::aux started xs |
| 1264 | ) |
| 1265 | | Unindent_cocci2(permanent)::((Cocci2("\n",_,_,_,_)) as x)::xs -> |
| 1266 | (* seems only relevant if there is a following cocci newline *) |
| 1267 | (match !_current_tabbing with |
| 1268 | | [] -> aux started xs |
| 1269 | | _::new_tabbing -> |
| 1270 | let s = String.concat "" new_tabbing in |
| 1271 | _current_tabbing := new_tabbing; |
| 1272 | x::Cocci2 (s,-1,-1,-1,None)::aux started xs |
| 1273 | ) |
| 1274 | | Unindent_cocci2(permanent)::xs -> aux started xs |
| 1275 | (* border between existing code and cocci code *) |
| 1276 | | ((T2 (tok,_,_,_)) as x)::((Cocci2("\n",_,_,_,_)) as y)::xs |
| 1277 | when str_of_token2 x =$= "{" -> |
| 1278 | x::aux true (y::Indent_cocci2::xs) |
| 1279 | | ((Cocci2 _) as x)::((T2 (tok,_,_,_)) as y)::xs |
| 1280 | when str_of_token2 y =$= "}" -> |
| 1281 | x::aux started (Unindent_cocci2 true::y::xs) |
| 1282 | (* starting the body of the function *) |
| 1283 | | ((T2 (tok,_,_,_)) as x)::xs when str_of_token2 x =$= "{" -> |
| 1284 | x::aux true xs |
| 1285 | | ((Cocci2("{",_,_,_,_)) as a)::xs -> a::aux true xs |
| 1286 | | ((Cocci2("\n",_,_,_,_)) as x)::xs -> |
| 1287 | (* dont inline in expr because of weird eval order of ocaml *) |
| 1288 | let s = String.concat "" !_current_tabbing in |
| 1289 | (* can't be C2, for later phases *) |
| 1290 | x::Cocci2 (s,-1,-1,-1,None)::aux started xs |
| 1291 | | x::xs -> x::aux started xs in |
| 1292 | (aux false xs,!tabbing_unit) |
| 1293 | |
| 1294 | |
| 1295 | let rec find_paren_comma = function |
| 1296 | | [] -> () |
| 1297 | |
| 1298 | (* do nothing if was like this in original file *) |
| 1299 | | { str = "("; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs) |
| 1300 | | { str = ","; idx = Some p1 } :: ({ str = ","; idx = Some p2} :: _ as xs) |
| 1301 | | { str = ","; idx = Some p1 } :: ({ str = ")"; idx = Some p2} :: _ as xs) |
| 1302 | when p2 =|= p1 + 1 -> |
| 1303 | find_paren_comma xs |
| 1304 | |
| 1305 | (* otherwise yes can adjust *) |
| 1306 | | { str = "(" } :: (({ str = ","} as rem) :: _ as xs) |
| 1307 | | ({ str = "," } as rem) :: ({ str = ","} :: _ as xs) |
| 1308 | | ({ str = "," } as rem) :: ({ str = ")"} :: _ as xs) -> |
| 1309 | rem.remove <- true; |
| 1310 | find_paren_comma xs |
| 1311 | |
| 1312 | | x::xs -> |
| 1313 | find_paren_comma xs |
| 1314 | |
| 1315 | |
| 1316 | let fix_tokens toks = |
| 1317 | let toks = toks +> List.map mk_token_extended in |
| 1318 | |
| 1319 | let cleaner = toks +> exclude (function |
| 1320 | | {tok2 = T2 (t,_,_,_)} -> TH.is_real_comment t (* I want the ifdef *) |
| 1321 | | _ -> false |
| 1322 | ) in |
| 1323 | find_paren_comma cleaner; |
| 1324 | |
| 1325 | let toks = rebuild_tokens_extented toks in |
| 1326 | toks +> List.map (fun x -> x.tok2) |
| 1327 | |
| 1328 | (* if we have to remove a '}' that is alone on a line, remove the line too *) |
| 1329 | let drop_line toks = |
| 1330 | let rec space_until_newline toks = |
| 1331 | match toks with |
| 1332 | | (T2(_, Min _, _, _) as hd) :: tl -> |
| 1333 | let (drop, tl) = space_until_newline tl in |
| 1334 | (drop, hd :: tl) |
| 1335 | | hd :: tl when is_space hd -> |
| 1336 | space_until_newline tl |
| 1337 | | Fake2 _ :: tl -> |
| 1338 | space_until_newline tl |
| 1339 | | hd :: tl when is_newline hd -> |
| 1340 | (true, toks) |
| 1341 | | _ -> |
| 1342 | (false, toks) in |
| 1343 | let rec loop toks = |
| 1344 | match toks with |
| 1345 | | (T2(_, Min _, _, _) as x) :: tl |
| 1346 | when str_of_token2 x =$= "}" -> |
| 1347 | let (drop, tl) = space_until_newline tl in |
| 1348 | (drop, x :: tl) |
| 1349 | | hd :: tl when is_whitespace hd -> |
| 1350 | let (drop, tl) = loop tl in |
| 1351 | if drop then |
| 1352 | (true, tl) |
| 1353 | else |
| 1354 | (false, toks) |
| 1355 | | _ -> (false, toks) in |
| 1356 | let rec find toks = |
| 1357 | let (_, toks) = loop toks in |
| 1358 | match toks with |
| 1359 | | [] -> [] |
| 1360 | | hd :: tl -> |
| 1361 | hd :: find tl in |
| 1362 | find toks |
| 1363 | |
| 1364 | (*****************************************************************************) |
| 1365 | (* Final unparsing (and debugging support) *) |
| 1366 | (*****************************************************************************) |
| 1367 | |
| 1368 | (* for debugging *) |
| 1369 | type kind_token2 = KFake | KCocci | KC | KExpanded | KOrigin |
| 1370 | |
| 1371 | let kind_of_token2 = function |
| 1372 | | Fake2 _ -> KFake |
| 1373 | | Cocci2 _ -> KCocci |
| 1374 | | C2 _ -> KC |
| 1375 | | Comma _ -> KC |
| 1376 | | T2 (t,_,_,_) -> |
| 1377 | (match TH.pinfo_of_tok t with |
| 1378 | | Ast_c.ExpandedTok _ -> KExpanded |
| 1379 | | Ast_c.OriginTok _ -> KOrigin |
| 1380 | | Ast_c.FakeTok _ -> raise (Impossible 139) (* now a Fake2 *) |
| 1381 | | Ast_c.AbstractLineTok _ -> raise (Impossible 140) (* now a KC *) |
| 1382 | ) |
| 1383 | | Unindent_cocci2 _ | Indent_cocci2 | EatSpace2 -> raise (Impossible 141) |
| 1384 | |
| 1385 | let end_mark = "!" |
| 1386 | |
| 1387 | let start_mark = function |
| 1388 | | KFake -> "!F!" |
| 1389 | | KCocci -> "!S!" |
| 1390 | | KC -> "!A!" |
| 1391 | | KExpanded -> "!E!" |
| 1392 | | KOrigin -> "" |
| 1393 | |
| 1394 | let print_all_tokens2 pr xs = |
| 1395 | if !Flag_parsing_c.debug_unparsing |
| 1396 | then |
| 1397 | let current_kind = ref KOrigin in |
| 1398 | xs +> List.iter (fun t -> |
| 1399 | let newkind = kind_of_token2 t in |
| 1400 | if newkind =*= !current_kind |
| 1401 | then pr (str_of_token2 t) |
| 1402 | else |
| 1403 | begin |
| 1404 | pr (end_mark); |
| 1405 | pr (start_mark newkind); |
| 1406 | pr (str_of_token2 t); |
| 1407 | current_kind := newkind |
| 1408 | end |
| 1409 | ); |
| 1410 | else |
| 1411 | let to_whitespace s = |
| 1412 | let r = String.copy s in |
| 1413 | for i = 1 to String.length r do |
| 1414 | let c = String.get r (i-1) in |
| 1415 | match c with |
| 1416 | | ' ' | '\t' | '\r' | '\n' -> () |
| 1417 | | _ -> String.set r (i-1) ' ' |
| 1418 | done; |
| 1419 | r in |
| 1420 | let hiding_level = ref 0 in |
| 1421 | let handle_token t = |
| 1422 | let s = str_of_token2 t in |
| 1423 | let hide_current = |
| 1424 | match t with |
| 1425 | | T2 (t,_,_,_) -> |
| 1426 | let i = TH.info_of_tok t in |
| 1427 | (match Ast_c.get_annot_info i Token_annot.Exclude_start with |
| 1428 | | None -> () |
| 1429 | | Some _ -> hiding_level := !hiding_level + 1 |
| 1430 | ); |
| 1431 | let hide_current = !hiding_level > 0 in |
| 1432 | (match Ast_c.get_annot_info i Token_annot.Exclude_end with |
| 1433 | | None -> () |
| 1434 | | Some _ -> hiding_level := max (!hiding_level - 1) 0 |
| 1435 | ); |
| 1436 | hide_current |
| 1437 | | _ -> !hiding_level > 0 in |
| 1438 | if hide_current then to_whitespace s else s in |
| 1439 | xs +> List.iter (fun x -> pr (handle_token x)) |
| 1440 | |
| 1441 | |
| 1442 | |
| 1443 | |
| 1444 | |
| 1445 | (*****************************************************************************) |
| 1446 | (* Entry points *) |
| 1447 | (*****************************************************************************) |
| 1448 | |
| 1449 | (* old: PPviatok was made in the beginning to allow to pretty print a |
| 1450 | * complete C file, including a modified C file by transformation.ml, |
| 1451 | * even if we don't handle yet in pretty_print_c.ml, ast_to_flow (and |
| 1452 | * maybe flow_to_ast) all the cases. Indeed we don't need to do some |
| 1453 | * fancy stuff when a function was not modified at all. Just need to |
| 1454 | * print the list of token as-is. But now pretty_print_c.ml handles |
| 1455 | * almost everything so maybe less useful. Maybe PPviatok allows to |
| 1456 | * optimize a little the pretty printing. |
| 1457 | * |
| 1458 | * update: now have PPviastr which goes even faster than PPviatok, so |
| 1459 | * PPviatok has disappeared. |
| 1460 | *) |
| 1461 | |
| 1462 | type ppmethod = PPnormal | PPviastr |
| 1463 | |
| 1464 | |
| 1465 | |
| 1466 | |
| 1467 | (* The pp_program function will call pretty_print_c.ml with a special |
| 1468 | * function to print the leaf components, the tokens. When we want to |
| 1469 | * print a token, we need to print also maybe the space and comments that |
| 1470 | * were close to it in the original file (and that was omitted during the |
| 1471 | * parsing phase), and honor what the cocci-info attached to the token says. |
| 1472 | * Maybe we will not print the token if it's a MINUS-token, and maybe we will |
| 1473 | * print it and also print some cocci-code attached in a PLUS to it. |
| 1474 | * So we will also maybe call unparse_cocci. Because the cocci-code may |
| 1475 | * contain metavariables, unparse_cocci will in fact sometimes call back |
| 1476 | * pretty_print_c (which will this time don't call back again unparse_cocci) |
| 1477 | *) |
| 1478 | |
| 1479 | let pp_program2 xs outfile = |
| 1480 | with_open_outfile outfile (fun (pr,chan) -> |
| 1481 | let pr s = |
| 1482 | if !Flag_parsing_c.debug_unparsing |
| 1483 | then begin pr2_no_nl s; flush stderr end |
| 1484 | else pr s |
| 1485 | (* flush chan; *) |
| 1486 | (* Common.pr2 ("UNPARSING: >" ^ s ^ "<"); *) |
| 1487 | in |
| 1488 | |
| 1489 | xs +> List.iter (fun ((e,(str, toks_e)), ppmethod) -> |
| 1490 | (* here can still work on ast *) |
| 1491 | let e = remove_useless_fakeInfo_struct e in |
| 1492 | |
| 1493 | match ppmethod with |
| 1494 | | PPnormal -> |
| 1495 | (* now work on tokens *) |
| 1496 | (* phase1: just get all the tokens, all the information *) |
| 1497 | assert(toks_e +> List.for_all (fun t -> |
| 1498 | TH.is_origin t or TH.is_expanded t |
| 1499 | )); |
| 1500 | let toks = get_fakeInfo_and_tokens e toks_e in |
| 1501 | let toks = displace_fake_nodes toks in |
| 1502 | (* assert Origin;ExpandedTok;Faketok *) |
| 1503 | let toks = expand_mcode toks in |
| 1504 | |
| 1505 | (* assert Origin;ExpandedTok; + Cocci + C (was AbstractLineTok) |
| 1506 | * and no tag information, just NOTHING. *) |
| 1507 | |
| 1508 | let toks = |
| 1509 | if !Flag.sgrep_mode2 |
| 1510 | then |
| 1511 | (* nothing else to do for sgrep *) |
| 1512 | drop_expanded(drop_fake(drop_minus toks)) |
| 1513 | else |
| 1514 | begin |
| 1515 | (* phase2: can now start to filter and adjust *) |
| 1516 | let (toks,tu) = adjust_indentation toks in |
| 1517 | let toks = adjust_eat_space toks in |
| 1518 | let toks = adjust_before_semicolon toks in(*before remove minus*) |
| 1519 | let toks = adjust_after_paren toks in(*also before remove minus*) |
| 1520 | let toks = drop_space_at_endline toks in |
| 1521 | let toks = paren_to_space toks in |
| 1522 | let toks = drop_end_comma toks in |
| 1523 | |
| 1524 | let toks = drop_line toks in |
| 1525 | |
| 1526 | let toks = remove_minus_and_between_and_expanded_and_fake toks in |
| 1527 | (* assert Origin + Cocci + C and no minus *) |
| 1528 | let toks = add_space toks in |
| 1529 | let toks = add_newlines toks tu in |
| 1530 | let toks = paren_then_brace toks in |
| 1531 | let toks = fix_tokens toks in |
| 1532 | toks |
| 1533 | end in |
| 1534 | |
| 1535 | (* in theory here could reparse and rework the ast! or |
| 1536 | * apply some SP. Not before cos julia may have generated |
| 1537 | * not parsable file. Need do unparsing_tricks call before |
| 1538 | * being ready to reparse. *) |
| 1539 | print_all_tokens2 pr toks; |
| 1540 | |
| 1541 | | PPviastr -> pr str |
| 1542 | ) |
| 1543 | ) |
| 1544 | |
| 1545 | let pp_program a b = |
| 1546 | profile_code "C unparsing" (fun () -> pp_program2 a b) |
| 1547 | |
| 1548 | |
| 1549 | let pp_program_default xs outfile = |
| 1550 | let xs' = xs +> List.map (fun x -> x, PPnormal) in |
| 1551 | pp_program xs' outfile |