| 1 | (* Yoann Padioleau |
| 2 | * |
| 3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. |
| 4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes |
| 5 | * |
| 6 | * This program is free software; you can redistribute it and/or |
| 7 | * modify it under the terms of the GNU General Public License (GPL) |
| 8 | * version 2 as published by the Free Software Foundation. |
| 9 | * |
| 10 | * This program is distributed in the hope that it will be useful, |
| 11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of |
| 12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
| 13 | * file license.txt for more details. |
| 14 | *) |
| 15 | |
| 16 | open Common |
| 17 | |
| 18 | module TH = Token_helpers |
| 19 | |
| 20 | open Parser_c |
| 21 | |
| 22 | (*****************************************************************************) |
| 23 | (* Some debugging functions *) |
| 24 | (*****************************************************************************) |
| 25 | |
| 26 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
| 27 | |
| 28 | (* ------------------------------------------------------------------------- *) |
| 29 | (* fuzzy parsing, different "views" over the same program *) |
| 30 | (* ------------------------------------------------------------------------- *) |
| 31 | |
| 32 | |
| 33 | (* Normally I should not use ref/mutable in the token_extended type |
| 34 | * and I should have a set of functions taking a list of tokens and |
| 35 | * returning a list of tokens. The problem is that to make easier some |
| 36 | * functions, it is better to work on better representation, on "views" |
| 37 | * over this list of tokens. But then modifying those views and get |
| 38 | * back from those views to the original simple list of tokens is |
| 39 | * tedious. One way is to maintain next to the view a list of "actions" |
| 40 | * (I was using a hash storing the charpos of the token and associating |
| 41 | * the action) but it is tedious too. Simpler to use mutable/ref. We |
| 42 | * use the same idea that we use when working on the Ast_c. *) |
| 43 | |
| 44 | (* old: when I was using the list of "actions" next to the views, the hash |
| 45 | * indexed by the charpos, there could have been some problems: |
| 46 | * how my fake_pos interact with the way I tag and adjust token ? |
| 47 | * because I base my tagging on the position of the token ! so sometimes |
| 48 | * could tag another fakeInfo that should not be tagged ? |
| 49 | * fortunately I don't use anymore this technique. |
| 50 | *) |
| 51 | |
| 52 | (* update: quite close to the Place_c.Inxxx *) |
| 53 | type context = |
| 54 | InFunction | InEnum | InStruct | InInitializer | NoContext |
| 55 | |
| 56 | type token_extended = { |
| 57 | mutable tok: Parser_c.token; |
| 58 | mutable where: context; |
| 59 | |
| 60 | (* less: need also a after ? *) |
| 61 | mutable new_tokens_before : Parser_c.token list; |
| 62 | |
| 63 | (* line x col cache, more easily accessible, of the info in the token *) |
| 64 | line: int; |
| 65 | col : int; |
| 66 | } |
| 67 | |
| 68 | (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then |
| 69 | * can confuse the parser. |
| 70 | *) |
| 71 | let set_as_comment cppkind x = |
| 72 | if TH.is_eof x.tok |
| 73 | then () (* otherwise parse_c will be lost if don't find a EOF token *) |
| 74 | else |
| 75 | x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok) |
| 76 | |
| 77 | let save_as_comment cppkind x = |
| 78 | if TH.is_eof x.tok |
| 79 | then () (* otherwise parse_c will be lost if don't find a EOF token *) |
| 80 | else |
| 81 | let t = |
| 82 | match x.tok with |
| 83 | TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef |
| 84 | | TIfdefBool _ -> Token_c.IfDef0 |
| 85 | | TIfdefelse _ | TIfdefelif _ -> Token_c.Else |
| 86 | | TEndif _ -> Token_c.Endif |
| 87 | | _ -> Token_c.Other in |
| 88 | x.tok <- TCommentCpp (cppkind t, TH.info_of_tok x.tok) |
| 89 | |
| 90 | let mk_token_extended x = |
| 91 | let (line, col) = TH.linecol_of_tok x in |
| 92 | { tok = x; |
| 93 | line = line; col = col; |
| 94 | where = NoContext; |
| 95 | new_tokens_before = []; |
| 96 | } |
| 97 | |
| 98 | |
| 99 | let rebuild_tokens_extented toks_ext = |
| 100 | let _tokens = ref [] in |
| 101 | toks_ext +> List.iter (fun tok -> |
| 102 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); |
| 103 | push2 tok.tok _tokens |
| 104 | ); |
| 105 | let tokens = List.rev !_tokens in |
| 106 | (tokens +> acc_map mk_token_extended) |
| 107 | |
| 108 | |
| 109 | |
| 110 | (* x list list, because x list separated by ',' *) |
| 111 | type paren_grouped = |
| 112 | | Parenthised of paren_grouped list list * token_extended list |
| 113 | | PToken of token_extended |
| 114 | |
| 115 | type brace_grouped = |
| 116 | | Braceised of |
| 117 | brace_grouped list list * token_extended * token_extended option |
| 118 | | BToken of token_extended |
| 119 | |
| 120 | (* Far better data structure than doing hacks in the lexer or parser |
| 121 | * because in lexer we don't know to which ifdef a endif is related |
| 122 | * and so when we want to comment a ifdef, we don't know which endif |
| 123 | * we must also comment. Especially true for the #if 0 which sometimes |
| 124 | * have a #else part. |
| 125 | * |
| 126 | * x list list, because x list separated by #else or #elif |
| 127 | *) |
| 128 | type ifdef_grouped = |
| 129 | | Ifdef of ifdef_grouped list list * token_extended list |
| 130 | | Ifdefbool of bool * ifdef_grouped list list * token_extended list |
| 131 | | NotIfdefLine of token_extended list |
| 132 | |
| 133 | |
| 134 | type 'a line_grouped = |
| 135 | Line of 'a list |
| 136 | |
| 137 | |
| 138 | type body_function_grouped = |
| 139 | | BodyFunction of token_extended list |
| 140 | | NotBodyLine of token_extended list |
| 141 | |
| 142 | |
| 143 | (* ------------------------------------------------------------------------- *) |
| 144 | (* view builders *) |
| 145 | (* ------------------------------------------------------------------------- *) |
| 146 | |
| 147 | (* todo: synchro ! use more indentation |
| 148 | * if paren not closed and same indentation level, certainly because |
| 149 | * part of a mid-ifdef-expression. |
| 150 | *) |
| 151 | let rec mk_parenthised xs = |
| 152 | let rec loop acc = function |
| 153 | | [] -> acc |
| 154 | | x::xs -> |
| 155 | (match x.tok with |
| 156 | | TOPar _ | TOParDefine _ -> |
| 157 | let body, extras, xs = mk_parameters [x] [] xs in |
| 158 | loop (Parenthised (body,extras)::acc) xs |
| 159 | | _ -> |
| 160 | loop (PToken x::acc) xs |
| 161 | ) in |
| 162 | List.rev(loop [] xs) |
| 163 | |
| 164 | (* return the body of the parenthised expression and the rest of the tokens *) |
| 165 | and mk_parameters extras acc_before_sep xs = |
| 166 | match xs with |
| 167 | | [] -> |
| 168 | (* maybe because of #ifdef which "opens" '(' in 2 branches *) |
| 169 | pr2 "PB: not found closing paren in fuzzy parsing"; |
| 170 | [List.rev acc_before_sep], List.rev extras, [] |
| 171 | | x::xs -> |
| 172 | (match x.tok with |
| 173 | (* synchro *) |
| 174 | | TOBrace _ when x.col =|= 0 -> |
| 175 | pr2 "PB: found synchro point } in paren"; |
| 176 | [List.rev acc_before_sep], List.rev (extras), (x::xs) |
| 177 | |
| 178 | | TCPar _ | TCParEOL _ -> |
| 179 | [List.rev acc_before_sep], List.rev (x::extras), xs |
| 180 | | TOPar _ | TOParDefine _ -> |
| 181 | let body, extrasnest, xs = mk_parameters [x] [] xs in |
| 182 | mk_parameters extras |
| 183 | (Parenthised (body,extrasnest)::acc_before_sep) |
| 184 | xs |
| 185 | | TComma _ -> |
| 186 | let body, extras, xs = mk_parameters (x::extras) [] xs in |
| 187 | (List.rev acc_before_sep)::body, extras, xs |
| 188 | | _ -> |
| 189 | mk_parameters extras (PToken x::acc_before_sep) xs |
| 190 | ) |
| 191 | |
| 192 | |
| 193 | |
| 194 | |
| 195 | let rec mk_braceised xs = |
| 196 | let rec loop acc = function |
| 197 | | [] -> acc |
| 198 | | x::xs -> |
| 199 | (match x.tok with |
| 200 | | TOBrace _ -> |
| 201 | let body, endbrace, xs = mk_braceised_aux [] xs in |
| 202 | loop (Braceised (body, x, endbrace)::acc) xs |
| 203 | | TCBrace _ -> |
| 204 | pr2 "PB: found closing brace alone in fuzzy parsing"; |
| 205 | loop (BToken x::acc) xs |
| 206 | | _ -> |
| 207 | loop (BToken x::acc) xs) in |
| 208 | List.rev(loop [] xs) |
| 209 | |
| 210 | (* return the body of the parenthised expression and the rest of the tokens *) |
| 211 | and mk_braceised_aux acc xs = |
| 212 | match xs with |
| 213 | | [] -> |
| 214 | (* maybe because of #ifdef which "opens" '(' in 2 branches *) |
| 215 | pr2 "PB: not found closing brace in fuzzy parsing"; |
| 216 | [List.rev acc], None, [] |
| 217 | | x::xs -> |
| 218 | (match x.tok with |
| 219 | | TCBrace _ -> [List.rev acc], Some x, xs |
| 220 | | TOBrace _ -> |
| 221 | let body, endbrace, xs = mk_braceised_aux [] xs in |
| 222 | mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs |
| 223 | | _ -> |
| 224 | mk_braceised_aux (BToken x::acc) xs |
| 225 | ) |
| 226 | |
| 227 | |
| 228 | |
| 229 | |
| 230 | let rec mk_ifdef xs = |
| 231 | match xs with |
| 232 | | [] -> [] |
| 233 | | x::xs -> |
| 234 | (match x.tok with |
| 235 | | TIfdef _ -> |
| 236 | let body, extra, xs = mk_ifdef_parameters [x] [] xs in |
| 237 | Ifdef (body, extra)::mk_ifdef xs |
| 238 | | TIfdefBool (b,_, _) -> |
| 239 | let body, extra, xs = mk_ifdef_parameters [x] [] xs in |
| 240 | |
| 241 | (* if not passing, then consider a #if 0 as an ordinary #ifdef *) |
| 242 | if !Flag_parsing_c.if0_passing |
| 243 | then Ifdefbool (b, body, extra)::mk_ifdef xs |
| 244 | else Ifdef(body, extra)::mk_ifdef xs |
| 245 | |
| 246 | | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) -> |
| 247 | let body, extra, xs = mk_ifdef_parameters [x] [] xs in |
| 248 | Ifdefbool (b, body, extra)::mk_ifdef xs |
| 249 | |
| 250 | |
| 251 | | _ -> |
| 252 | (* todo? can have some Ifdef in the line ? *) |
| 253 | let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in |
| 254 | NotIfdefLine line::mk_ifdef xs |
| 255 | ) |
| 256 | |
| 257 | and mk_ifdef_parameters extras acc_before_sep xs = |
| 258 | match xs with |
| 259 | | [] -> |
| 260 | (* Note that mk_ifdef is assuming that CPP instruction are alone |
| 261 | * on their line. Because I do a span (fun x -> is_same_line ...) |
| 262 | * I might take with me a #endif if this one is mixed on a line |
| 263 | * with some "normal" tokens. |
| 264 | *) |
| 265 | pr2 "PB: not found closing ifdef in fuzzy parsing"; |
| 266 | [List.rev acc_before_sep], List.rev extras, [] |
| 267 | | x::xs -> |
| 268 | (match x.tok with |
| 269 | | TEndif _ -> |
| 270 | [List.rev acc_before_sep], List.rev (x::extras), xs |
| 271 | | TIfdef _ -> |
| 272 | let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in |
| 273 | mk_ifdef_parameters |
| 274 | extras (Ifdef (body, extrasnest)::acc_before_sep) xs |
| 275 | |
| 276 | | TIfdefBool (b,_,_) -> |
| 277 | let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in |
| 278 | |
| 279 | if !Flag_parsing_c.if0_passing |
| 280 | then |
| 281 | mk_ifdef_parameters |
| 282 | extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs |
| 283 | else |
| 284 | mk_ifdef_parameters |
| 285 | extras (Ifdef (body, extrasnest)::acc_before_sep) xs |
| 286 | |
| 287 | |
| 288 | | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) -> |
| 289 | let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in |
| 290 | mk_ifdef_parameters |
| 291 | extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs |
| 292 | |
| 293 | | TIfdefelse _ |
| 294 | | TIfdefelif _ -> |
| 295 | let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in |
| 296 | (List.rev acc_before_sep)::body, extras, xs |
| 297 | | _ -> |
| 298 | let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in |
| 299 | mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs |
| 300 | ) |
| 301 | |
| 302 | (* --------------------------------------- *) |
| 303 | |
| 304 | let line_of_paren = function |
| 305 | | PToken x -> x.line |
| 306 | | Parenthised (xxs, info_parens) -> |
| 307 | (match info_parens with |
| 308 | | [] -> raise (Impossible 121) |
| 309 | | x::xs -> x.line |
| 310 | ) |
| 311 | |
| 312 | |
| 313 | let rec span_line_paren line = function |
| 314 | | [] -> [],[] |
| 315 | | x::xs -> |
| 316 | (match x with |
| 317 | | PToken tok when TH.is_eof tok.tok -> |
| 318 | [], x::xs |
| 319 | | _ -> |
| 320 | if line_of_paren x =|= line |
| 321 | then |
| 322 | let (l1, l2) = span_line_paren line xs in |
| 323 | (x::l1, l2) |
| 324 | else ([], x::xs) |
| 325 | ) |
| 326 | |
| 327 | |
| 328 | let rec mk_line_parenthised xs = |
| 329 | match xs with |
| 330 | | [] -> [] |
| 331 | | x::xs -> |
| 332 | let line_no = line_of_paren x in |
| 333 | let line, xs = span_line_paren line_no xs in |
| 334 | Line (x::line)::mk_line_parenthised xs |
| 335 | |
| 336 | |
| 337 | (* --------------------------------------- *) |
| 338 | let rec mk_body_function_grouped xs = |
| 339 | match xs with |
| 340 | | [] -> [] |
| 341 | | x::xs -> |
| 342 | (match x with |
| 343 | | {tok = TOBrace _; col = 0} -> |
| 344 | let is_closing_brace = function |
| 345 | | {tok = TCBrace _; col = 0 } -> true |
| 346 | | _ -> false |
| 347 | in |
| 348 | let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in |
| 349 | (match xs with |
| 350 | | ({tok = TCBrace _; col = 0 })::xs -> |
| 351 | BodyFunction body::mk_body_function_grouped xs |
| 352 | | [] -> |
| 353 | pr2 "PB:not found closing brace in fuzzy parsing"; |
| 354 | [NotBodyLine body] |
| 355 | | _ -> raise (Impossible 122) |
| 356 | ) |
| 357 | |
| 358 | | _ -> |
| 359 | let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in |
| 360 | NotBodyLine line::mk_body_function_grouped xs |
| 361 | ) |
| 362 | |
| 363 | |
| 364 | (* ------------------------------------------------------------------------- *) |
| 365 | (* view iterators *) |
| 366 | (* ------------------------------------------------------------------------- *) |
| 367 | |
| 368 | let rec iter_token_paren f xs = |
| 369 | xs +> List.iter (function |
| 370 | | PToken tok -> f tok; |
| 371 | | Parenthised (xxs, info_parens) -> |
| 372 | info_parens +> List.iter f; |
| 373 | xxs +> List.iter (fun xs -> iter_token_paren f xs) |
| 374 | ) |
| 375 | |
| 376 | let rec iter_token_brace f xs = |
| 377 | xs +> List.iter (function |
| 378 | | BToken tok -> f tok; |
| 379 | | Braceised (xxs, tok1, tok2opt) -> |
| 380 | f tok1; do_option f tok2opt; |
| 381 | xxs +> List.iter (fun xs -> iter_token_brace f xs) |
| 382 | ) |
| 383 | |
| 384 | let rec iter_token_ifdef f xs = |
| 385 | xs +> List.iter (function |
| 386 | | NotIfdefLine xs -> xs +> List.iter f; |
| 387 | | Ifdefbool (_, xxs, info_ifdef) |
| 388 | | Ifdef (xxs, info_ifdef) -> |
| 389 | info_ifdef +> List.iter f; |
| 390 | xxs +> List.iter (iter_token_ifdef f) |
| 391 | ) |
| 392 | |
| 393 | |
| 394 | |
| 395 | |
| 396 | let tokens_of_paren xs = |
| 397 | let g = ref [] in |
| 398 | xs +> iter_token_paren (fun tok -> push2 tok g); |
| 399 | List.rev !g |
| 400 | |
| 401 | |
| 402 | let tokens_of_paren_ordered xs = |
| 403 | let g = ref [] in |
| 404 | |
| 405 | let rec aux_tokens_ordered = function |
| 406 | | PToken tok -> push2 tok g; |
| 407 | | Parenthised (xxs, info_parens) -> |
| 408 | let (opar, cpar, commas) = |
| 409 | match info_parens with |
| 410 | | opar::xs -> |
| 411 | (match List.rev xs with |
| 412 | | cpar::xs -> |
| 413 | opar, cpar, List.rev xs |
| 414 | | _ -> raise (Impossible 123) |
| 415 | ) |
| 416 | | _ -> raise (Impossible 124) |
| 417 | in |
| 418 | push2 opar g; |
| 419 | aux_args (xxs,commas); |
| 420 | push2 cpar g; |
| 421 | |
| 422 | and aux_args (xxs, commas) = |
| 423 | match xxs, commas with |
| 424 | | [], [] -> () |
| 425 | | [xs], [] -> xs +> List.iter aux_tokens_ordered |
| 426 | | xs::ys::xxs, comma::commas -> |
| 427 | xs +> List.iter aux_tokens_ordered; |
| 428 | push2 comma g; |
| 429 | aux_args (ys::xxs, commas) |
| 430 | | _ -> raise (Impossible 125) |
| 431 | |
| 432 | in |
| 433 | |
| 434 | xs +> List.iter aux_tokens_ordered; |
| 435 | List.rev !g |
| 436 | |
| 437 | |
| 438 | |
| 439 | (* ------------------------------------------------------------------------- *) |
| 440 | (* set the context info in token *) |
| 441 | (* ------------------------------------------------------------------------- *) |
| 442 | |
| 443 | |
| 444 | let rec set_in_function_tag xs = |
| 445 | (* could try: ) { } but it can be the ) of a if or while, so |
| 446 | * better to base the heuristic on the position in column zero. |
| 447 | * Note that some struct or enum or init put also their { in first column |
| 448 | * but set_in_other will overwrite the previous InFunction tag. |
| 449 | *) |
| 450 | match xs with |
| 451 | | [] -> () |
| 452 | (* ) { and the closing } is in column zero, then certainly a function *) |
| 453 | | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs |
| 454 | when tok1.col <> 0 && tok2.col =|= 0 -> |
| 455 | body +> List.iter (iter_token_brace (fun tok -> |
| 456 | tok.where <- InFunction |
| 457 | )); |
| 458 | set_in_function_tag xs |
| 459 | |
| 460 | | (BToken x)::xs -> set_in_function_tag xs |
| 461 | |
| 462 | | (Braceised (body, tok1, Some tok2))::xs |
| 463 | when tok1.col =|= 0 && tok2.col =|= 0 -> |
| 464 | body +> List.iter (iter_token_brace (fun tok -> |
| 465 | tok.where <- InFunction |
| 466 | )); |
| 467 | set_in_function_tag xs |
| 468 | | Braceised (body, tok1, tok2)::xs -> |
| 469 | set_in_function_tag xs |
| 470 | |
| 471 | |
| 472 | let rec set_in_other xs = |
| 473 | match xs with |
| 474 | | [] -> () |
| 475 | (* enum x { } *) |
| 476 | | BToken ({tok = Tenum _})::BToken ({tok = TIdent _}) |
| 477 | ::Braceised(body, tok1, tok2)::xs |
| 478 | | BToken ({tok = Tenum _}) |
| 479 | ::Braceised(body, tok1, tok2)::xs |
| 480 | -> |
| 481 | body +> List.iter (iter_token_brace (fun tok -> |
| 482 | tok.where <- InEnum; |
| 483 | )); |
| 484 | set_in_other xs |
| 485 | |
| 486 | (* struct x { } *) |
| 487 | | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _}) |
| 488 | ::Braceised(body, tok1, tok2)::xs -> |
| 489 | body +> List.iter (iter_token_brace (fun tok -> |
| 490 | tok.where <- InStruct; |
| 491 | )); |
| 492 | set_in_other xs |
| 493 | (* = { } *) |
| 494 | | BToken ({tok = TEq _}) |
| 495 | ::Braceised(body, tok1, tok2)::xs -> |
| 496 | body +> List.iter (iter_token_brace (fun tok -> |
| 497 | tok.where <- InInitializer; |
| 498 | )); |
| 499 | set_in_other xs |
| 500 | |
| 501 | | BToken _::xs -> set_in_other xs |
| 502 | |
| 503 | | Braceised(body, tok1, tok2)::xs -> |
| 504 | body +> List.iter set_in_other; |
| 505 | set_in_other xs |
| 506 | |
| 507 | |
| 508 | |
| 509 | |
| 510 | let set_context_tag xs = |
| 511 | begin |
| 512 | set_in_function_tag xs; |
| 513 | set_in_other xs; |
| 514 | end |
| 515 | |