Commit | Line | Data |
---|---|---|
708f4980 | 1 | (* Yoann Padioleau |
ae4735db C |
2 | * |
3 | * Copyright (C) 2010, University of Copenhagen DIKU and INRIA. | |
708f4980 C |
4 | * Copyright (C) 2007, 2008 Ecole des Mines de Nantes |
5 | * | |
6 | * This program is free software; you can redistribute it and/or | |
7 | * modify it under the terms of the GNU General Public License (GPL) | |
8 | * version 2 as published by the Free Software Foundation. | |
ae4735db | 9 | * |
708f4980 C |
10 | * This program is distributed in the hope that it will be useful, |
11 | * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 | * file license.txt for more details. | |
14 | *) | |
15 | ||
16 | open Common | |
17 | ||
ae4735db | 18 | module TH = Token_helpers |
708f4980 | 19 | |
ae4735db | 20 | open Parser_c |
708f4980 C |
21 | |
22 | (*****************************************************************************) | |
23 | (* Some debugging functions *) | |
24 | (*****************************************************************************) | |
25 | ||
ae4735db | 26 | let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing |
708f4980 C |
27 | |
28 | (* ------------------------------------------------------------------------- *) | |
29 | (* fuzzy parsing, different "views" over the same program *) | |
30 | (* ------------------------------------------------------------------------- *) | |
31 | ||
32 | ||
33 | (* Normally I should not use ref/mutable in the token_extended type | |
34 | * and I should have a set of functions taking a list of tokens and | |
35 | * returning a list of tokens. The problem is that to make easier some | |
36 | * functions, it is better to work on better representation, on "views" | |
37 | * over this list of tokens. But then modifying those views and get | |
38 | * back from those views to the original simple list of tokens is | |
39 | * tedious. One way is to maintain next to the view a list of "actions" | |
40 | * (I was using a hash storing the charpos of the token and associating | |
41 | * the action) but it is tedious too. Simpler to use mutable/ref. We | |
42 | * use the same idea that we use when working on the Ast_c. *) | |
43 | ||
44 | (* old: when I was using the list of "actions" next to the views, the hash | |
45 | * indexed by the charpos, there could have been some problems: | |
46 | * how my fake_pos interact with the way I tag and adjust token ? | |
47 | * because I base my tagging on the position of the token ! so sometimes | |
ae4735db | 48 | * could tag another fakeInfo that should not be tagged ? |
708f4980 C |
49 | * fortunately I don't use anymore this technique. |
50 | *) | |
51 | ||
52 | (* update: quite close to the Place_c.Inxxx *) | |
ae4735db | 53 | type context = |
708f4980 C |
54 | InFunction | InEnum | InStruct | InInitializer | NoContext |
55 | ||
ae4735db | 56 | type token_extended = { |
708f4980 C |
57 | mutable tok: Parser_c.token; |
58 | mutable where: context; | |
59 | ||
60 | (* less: need also a after ? *) | |
61 | mutable new_tokens_before : Parser_c.token list; | |
62 | ||
63 | (* line x col cache, more easily accessible, of the info in the token *) | |
ae4735db | 64 | line: int; |
708f4980 C |
65 | col : int; |
66 | } | |
67 | ||
68 | (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then | |
69 | * can confuse the parser. | |
70 | *) | |
ae4735db C |
71 | let set_as_comment cppkind x = |
72 | if TH.is_eof x.tok | |
708f4980 | 73 | then () (* otherwise parse_c will be lost if don't find a EOF token *) |
ae4735db | 74 | else |
708f4980 C |
75 | x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok) |
76 | ||
ae4735db | 77 | let mk_token_extended x = |
708f4980 | 78 | let (line, col) = TH.linecol_of_tok x in |
ae4735db C |
79 | { tok = x; |
80 | line = line; col = col; | |
81 | where = NoContext; | |
708f4980 C |
82 | new_tokens_before = []; |
83 | } | |
84 | ||
85 | ||
ae4735db | 86 | let rebuild_tokens_extented toks_ext = |
708f4980 | 87 | let _tokens = ref [] in |
ae4735db | 88 | toks_ext +> List.iter (fun tok -> |
708f4980 | 89 | tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens); |
ae4735db | 90 | push2 tok.tok _tokens |
708f4980 C |
91 | ); |
92 | let tokens = List.rev !_tokens in | |
93 | (tokens +> acc_map mk_token_extended) | |
94 | ||
95 | ||
96 | ||
ae4735db C |
97 | (* x list list, because x list separated by ',' *) |
98 | type paren_grouped = | |
708f4980 C |
99 | | Parenthised of paren_grouped list list * token_extended list |
100 | | PToken of token_extended | |
101 | ||
ae4735db C |
102 | type brace_grouped = |
103 | | Braceised of | |
708f4980 C |
104 | brace_grouped list list * token_extended * token_extended option |
105 | | BToken of token_extended | |
106 | ||
107 | (* Far better data structure than doing hacks in the lexer or parser | |
108 | * because in lexer we don't know to which ifdef a endif is related | |
109 | * and so when we want to comment a ifdef, we don't know which endif | |
110 | * we must also comment. Especially true for the #if 0 which sometimes | |
111 | * have a #else part. | |
ae4735db C |
112 | * |
113 | * x list list, because x list separated by #else or #elif | |
114 | *) | |
115 | type ifdef_grouped = | |
708f4980 C |
116 | | Ifdef of ifdef_grouped list list * token_extended list |
117 | | Ifdefbool of bool * ifdef_grouped list list * token_extended list | |
118 | | NotIfdefLine of token_extended list | |
119 | ||
120 | ||
ae4735db | 121 | type 'a line_grouped = |
708f4980 C |
122 | Line of 'a list |
123 | ||
124 | ||
ae4735db | 125 | type body_function_grouped = |
708f4980 C |
126 | | BodyFunction of token_extended list |
127 | | NotBodyLine of token_extended list | |
128 | ||
129 | ||
130 | (* ------------------------------------------------------------------------- *) | |
131 | (* view builders *) | |
132 | (* ------------------------------------------------------------------------- *) | |
133 | ||
ae4735db | 134 | (* todo: synchro ! use more indentation |
708f4980 C |
135 | * if paren not closed and same indentation level, certainly because |
136 | * part of a mid-ifdef-expression. | |
137 | *) | |
ae4735db | 138 | let rec mk_parenthised xs = |
951c7801 C |
139 | let rec loop acc = function |
140 | | [] -> acc | |
ae4735db C |
141 | | x::xs -> |
142 | (match x.tok with | |
143 | | TOPar _ | TOParDefine _ -> | |
708f4980 | 144 | let body, extras, xs = mk_parameters [x] [] xs in |
951c7801 | 145 | loop (Parenthised (body,extras)::acc) xs |
ae4735db | 146 | | _ -> |
951c7801 C |
147 | loop (PToken x::acc) xs |
148 | ) in | |
149 | List.rev(loop [] xs) | |
708f4980 C |
150 | |
151 | (* return the body of the parenthised expression and the rest of the tokens *) | |
ae4735db | 152 | and mk_parameters extras acc_before_sep xs = |
708f4980 | 153 | match xs with |
ae4735db | 154 | | [] -> |
708f4980 C |
155 | (* maybe because of #ifdef which "opens" '(' in 2 branches *) |
156 | pr2 "PB: not found closing paren in fuzzy parsing"; | |
157 | [List.rev acc_before_sep], List.rev extras, [] | |
ae4735db C |
158 | | x::xs -> |
159 | (match x.tok with | |
708f4980 | 160 | (* synchro *) |
ae4735db | 161 | | TOBrace _ when x.col =|= 0 -> |
708f4980 C |
162 | pr2 "PB: found synchro point } in paren"; |
163 | [List.rev acc_before_sep], List.rev (extras), (x::xs) | |
164 | ||
ae4735db | 165 | | TCPar _ | TCParEOL _ -> |
708f4980 | 166 | [List.rev acc_before_sep], List.rev (x::extras), xs |
ae4735db | 167 | | TOPar _ | TOParDefine _ -> |
708f4980 | 168 | let body, extrasnest, xs = mk_parameters [x] [] xs in |
ae4735db C |
169 | mk_parameters extras |
170 | (Parenthised (body,extrasnest)::acc_before_sep) | |
708f4980 | 171 | xs |
ae4735db | 172 | | TComma _ -> |
708f4980 | 173 | let body, extras, xs = mk_parameters (x::extras) [] xs in |
ae4735db C |
174 | (List.rev acc_before_sep)::body, extras, xs |
175 | | _ -> | |
708f4980 C |
176 | mk_parameters extras (PToken x::acc_before_sep) xs |
177 | ) | |
178 | ||
179 | ||
180 | ||
181 | ||
ae4735db | 182 | let rec mk_braceised xs = |
951c7801 C |
183 | let rec loop acc = function |
184 | | [] -> acc | |
ae4735db C |
185 | | x::xs -> |
186 | (match x.tok with | |
187 | | TOBrace _ -> | |
951c7801 C |
188 | let body, endbrace, xs = mk_braceised_aux [] xs in |
189 | loop (Braceised (body, x, endbrace)::acc) xs | |
ae4735db | 190 | | TCBrace _ -> |
951c7801 C |
191 | pr2 "PB: found closing brace alone in fuzzy parsing"; |
192 | loop (BToken x::acc) xs | |
ae4735db | 193 | | _ -> |
951c7801 C |
194 | loop (BToken x::acc) xs) in |
195 | List.rev(loop [] xs) | |
708f4980 C |
196 | |
197 | (* return the body of the parenthised expression and the rest of the tokens *) | |
ae4735db | 198 | and mk_braceised_aux acc xs = |
708f4980 | 199 | match xs with |
ae4735db | 200 | | [] -> |
708f4980 C |
201 | (* maybe because of #ifdef which "opens" '(' in 2 branches *) |
202 | pr2 "PB: not found closing brace in fuzzy parsing"; | |
203 | [List.rev acc], None, [] | |
ae4735db C |
204 | | x::xs -> |
205 | (match x.tok with | |
708f4980 | 206 | | TCBrace _ -> [List.rev acc], Some x, xs |
ae4735db | 207 | | TOBrace _ -> |
708f4980 C |
208 | let body, endbrace, xs = mk_braceised_aux [] xs in |
209 | mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs | |
ae4735db | 210 | | _ -> |
708f4980 C |
211 | mk_braceised_aux (BToken x::acc) xs |
212 | ) | |
213 | ||
708f4980 C |
214 | |
215 | ||
ae4735db C |
216 | |
217 | let rec mk_ifdef xs = | |
708f4980 C |
218 | match xs with |
219 | | [] -> [] | |
ae4735db C |
220 | | x::xs -> |
221 | (match x.tok with | |
222 | | TIfdef _ -> | |
708f4980 C |
223 | let body, extra, xs = mk_ifdef_parameters [x] [] xs in |
224 | Ifdef (body, extra)::mk_ifdef xs | |
ae4735db | 225 | | TIfdefBool (b,_, _) -> |
708f4980 | 226 | let body, extra, xs = mk_ifdef_parameters [x] [] xs in |
ae4735db | 227 | |
708f4980 C |
228 | (* if not passing, then consider a #if 0 as an ordinary #ifdef *) |
229 | if !Flag_parsing_c.if0_passing | |
230 | then Ifdefbool (b, body, extra)::mk_ifdef xs | |
231 | else Ifdef(body, extra)::mk_ifdef xs | |
232 | ||
ae4735db | 233 | | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) -> |
708f4980 C |
234 | let body, extra, xs = mk_ifdef_parameters [x] [] xs in |
235 | Ifdefbool (b, body, extra)::mk_ifdef xs | |
236 | ||
ae4735db C |
237 | |
238 | | _ -> | |
708f4980 C |
239 | (* todo? can have some Ifdef in the line ? *) |
240 | let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in | |
ae4735db | 241 | NotIfdefLine line::mk_ifdef xs |
708f4980 C |
242 | ) |
243 | ||
ae4735db | 244 | and mk_ifdef_parameters extras acc_before_sep xs = |
708f4980 | 245 | match xs with |
ae4735db | 246 | | [] -> |
708f4980 C |
247 | (* Note that mk_ifdef is assuming that CPP instruction are alone |
248 | * on their line. Because I do a span (fun x -> is_same_line ...) | |
249 | * I might take with me a #endif if this one is mixed on a line | |
250 | * with some "normal" tokens. | |
251 | *) | |
252 | pr2 "PB: not found closing ifdef in fuzzy parsing"; | |
253 | [List.rev acc_before_sep], List.rev extras, [] | |
ae4735db C |
254 | | x::xs -> |
255 | (match x.tok with | |
256 | | TEndif _ -> | |
708f4980 | 257 | [List.rev acc_before_sep], List.rev (x::extras), xs |
ae4735db | 258 | | TIfdef _ -> |
708f4980 | 259 | let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in |
ae4735db | 260 | mk_ifdef_parameters |
708f4980 C |
261 | extras (Ifdef (body, extrasnest)::acc_before_sep) xs |
262 | ||
ae4735db | 263 | | TIfdefBool (b,_,_) -> |
708f4980 C |
264 | let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in |
265 | ||
266 | if !Flag_parsing_c.if0_passing | |
267 | then | |
ae4735db | 268 | mk_ifdef_parameters |
708f4980 | 269 | extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs |
ae4735db C |
270 | else |
271 | mk_ifdef_parameters | |
708f4980 C |
272 | extras (Ifdef (body, extrasnest)::acc_before_sep) xs |
273 | ||
274 | ||
ae4735db | 275 | | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) -> |
708f4980 | 276 | let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in |
ae4735db | 277 | mk_ifdef_parameters |
708f4980 C |
278 | extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs |
279 | ||
ae4735db C |
280 | | TIfdefelse _ |
281 | | TIfdefelif _ -> | |
708f4980 | 282 | let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in |
ae4735db C |
283 | (List.rev acc_before_sep)::body, extras, xs |
284 | | _ -> | |
708f4980 C |
285 | let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in |
286 | mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs | |
287 | ) | |
288 | ||
289 | (* --------------------------------------- *) | |
290 | ||
291 | let line_of_paren = function | |
292 | | PToken x -> x.line | |
ae4735db | 293 | | Parenthised (xxs, info_parens) -> |
708f4980 C |
294 | (match info_parens with |
295 | | [] -> raise Impossible | |
296 | | x::xs -> x.line | |
297 | ) | |
298 | ||
299 | ||
300 | let rec span_line_paren line = function | |
301 | | [] -> [],[] | |
ae4735db | 302 | | x::xs -> |
708f4980 | 303 | (match x with |
ae4735db | 304 | | PToken tok when TH.is_eof tok.tok -> |
708f4980 | 305 | [], x::xs |
ae4735db C |
306 | | _ -> |
307 | if line_of_paren x =|= line | |
708f4980 C |
308 | then |
309 | let (l1, l2) = span_line_paren line xs in | |
310 | (x::l1, l2) | |
311 | else ([], x::xs) | |
312 | ) | |
708f4980 | 313 | |
ae4735db C |
314 | |
315 | let rec mk_line_parenthised xs = | |
708f4980 C |
316 | match xs with |
317 | | [] -> [] | |
ae4735db | 318 | | x::xs -> |
708f4980 C |
319 | let line_no = line_of_paren x in |
320 | let line, xs = span_line_paren line_no xs in | |
321 | Line (x::line)::mk_line_parenthised xs | |
322 | ||
323 | ||
324 | (* --------------------------------------- *) | |
ae4735db C |
325 | let rec mk_body_function_grouped xs = |
326 | match xs with | |
708f4980 | 327 | | [] -> [] |
ae4735db | 328 | | x::xs -> |
708f4980 | 329 | (match x with |
ae4735db C |
330 | | {tok = TOBrace _; col = 0} -> |
331 | let is_closing_brace = function | |
332 | | {tok = TCBrace _; col = 0 } -> true | |
333 | | _ -> false | |
708f4980 C |
334 | in |
335 | let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in | |
336 | (match xs with | |
ae4735db | 337 | | ({tok = TCBrace _; col = 0 })::xs -> |
708f4980 | 338 | BodyFunction body::mk_body_function_grouped xs |
ae4735db | 339 | | [] -> |
708f4980 C |
340 | pr2 "PB:not found closing brace in fuzzy parsing"; |
341 | [NotBodyLine body] | |
342 | | _ -> raise Impossible | |
343 | ) | |
ae4735db C |
344 | |
345 | | _ -> | |
708f4980 | 346 | let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in |
ae4735db | 347 | NotBodyLine line::mk_body_function_grouped xs |
708f4980 C |
348 | ) |
349 | ||
350 | ||
351 | (* ------------------------------------------------------------------------- *) | |
352 | (* view iterators *) | |
353 | (* ------------------------------------------------------------------------- *) | |
354 | ||
ae4735db | 355 | let rec iter_token_paren f xs = |
708f4980 C |
356 | xs +> List.iter (function |
357 | | PToken tok -> f tok; | |
ae4735db | 358 | | Parenthised (xxs, info_parens) -> |
708f4980 C |
359 | info_parens +> List.iter f; |
360 | xxs +> List.iter (fun xs -> iter_token_paren f xs) | |
361 | ) | |
362 | ||
ae4735db | 363 | let rec iter_token_brace f xs = |
708f4980 C |
364 | xs +> List.iter (function |
365 | | BToken tok -> f tok; | |
ae4735db | 366 | | Braceised (xxs, tok1, tok2opt) -> |
708f4980 C |
367 | f tok1; do_option f tok2opt; |
368 | xxs +> List.iter (fun xs -> iter_token_brace f xs) | |
369 | ) | |
370 | ||
ae4735db | 371 | let rec iter_token_ifdef f xs = |
708f4980 C |
372 | xs +> List.iter (function |
373 | | NotIfdefLine xs -> xs +> List.iter f; | |
ae4735db C |
374 | | Ifdefbool (_, xxs, info_ifdef) |
375 | | Ifdef (xxs, info_ifdef) -> | |
708f4980 C |
376 | info_ifdef +> List.iter f; |
377 | xxs +> List.iter (iter_token_ifdef f) | |
378 | ) | |
379 | ||
380 | ||
381 | ||
382 | ||
ae4735db | 383 | let tokens_of_paren xs = |
708f4980 C |
384 | let g = ref [] in |
385 | xs +> iter_token_paren (fun tok -> push2 tok g); | |
386 | List.rev !g | |
387 | ||
388 | ||
ae4735db | 389 | let tokens_of_paren_ordered xs = |
708f4980 C |
390 | let g = ref [] in |
391 | ||
392 | let rec aux_tokens_ordered = function | |
393 | | PToken tok -> push2 tok g; | |
ae4735db C |
394 | | Parenthised (xxs, info_parens) -> |
395 | let (opar, cpar, commas) = | |
708f4980 | 396 | match info_parens with |
ae4735db | 397 | | opar::xs -> |
708f4980 | 398 | (match List.rev xs with |
ae4735db | 399 | | cpar::xs -> |
708f4980 C |
400 | opar, cpar, List.rev xs |
401 | | _ -> raise Impossible | |
402 | ) | |
403 | | _ -> raise Impossible | |
404 | in | |
405 | push2 opar g; | |
406 | aux_args (xxs,commas); | |
407 | push2 cpar g; | |
408 | ||
409 | and aux_args (xxs, commas) = | |
410 | match xxs, commas with | |
411 | | [], [] -> () | |
412 | | [xs], [] -> xs +> List.iter aux_tokens_ordered | |
ae4735db | 413 | | xs::ys::xxs, comma::commas -> |
708f4980 C |
414 | xs +> List.iter aux_tokens_ordered; |
415 | push2 comma g; | |
416 | aux_args (ys::xxs, commas) | |
417 | | _ -> raise Impossible | |
418 | ||
419 | in | |
420 | ||
421 | xs +> List.iter aux_tokens_ordered; | |
422 | List.rev !g | |
423 | ||
424 | ||
425 | ||
426 | (* ------------------------------------------------------------------------- *) | |
427 | (* set the context info in token *) | |
428 | (* ------------------------------------------------------------------------- *) | |
429 | ||
430 | ||
ae4735db C |
431 | let rec set_in_function_tag xs = |
432 | (* could try: ) { } but it can be the ) of a if or while, so | |
708f4980 C |
433 | * better to base the heuristic on the position in column zero. |
434 | * Note that some struct or enum or init put also their { in first column | |
435 | * but set_in_other will overwrite the previous InFunction tag. | |
436 | *) | |
437 | match xs with | |
438 | | [] -> () | |
439 | (* ) { and the closing } is in column zero, then certainly a function *) | |
ae4735db C |
440 | | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs |
441 | when tok1.col <> 0 && tok2.col =|= 0 -> | |
442 | body +> List.iter (iter_token_brace (fun tok -> | |
708f4980 C |
443 | tok.where <- InFunction |
444 | )); | |
445 | set_in_function_tag xs | |
446 | ||
447 | | (BToken x)::xs -> set_in_function_tag xs | |
448 | ||
ae4735db C |
449 | | (Braceised (body, tok1, Some tok2))::xs |
450 | when tok1.col =|= 0 && tok2.col =|= 0 -> | |
451 | body +> List.iter (iter_token_brace (fun tok -> | |
708f4980 C |
452 | tok.where <- InFunction |
453 | )); | |
454 | set_in_function_tag xs | |
ae4735db | 455 | | Braceised (body, tok1, tok2)::xs -> |
708f4980 | 456 | set_in_function_tag xs |
708f4980 | 457 | |
ae4735db C |
458 | |
459 | let rec set_in_other xs = | |
460 | match xs with | |
708f4980 C |
461 | | [] -> () |
462 | (* enum x { } *) | |
463 | | BToken ({tok = Tenum _})::BToken ({tok = TIdent _}) | |
ae4735db | 464 | ::Braceised(body, tok1, tok2)::xs |
708f4980 | 465 | | BToken ({tok = Tenum _}) |
ae4735db C |
466 | ::Braceised(body, tok1, tok2)::xs |
467 | -> | |
468 | body +> List.iter (iter_token_brace (fun tok -> | |
708f4980 C |
469 | tok.where <- InEnum; |
470 | )); | |
471 | set_in_other xs | |
472 | ||
473 | (* struct x { } *) | |
474 | | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _}) | |
ae4735db C |
475 | ::Braceised(body, tok1, tok2)::xs -> |
476 | body +> List.iter (iter_token_brace (fun tok -> | |
708f4980 C |
477 | tok.where <- InStruct; |
478 | )); | |
479 | set_in_other xs | |
480 | (* = { } *) | |
481 | | BToken ({tok = TEq _}) | |
ae4735db C |
482 | ::Braceised(body, tok1, tok2)::xs -> |
483 | body +> List.iter (iter_token_brace (fun tok -> | |
708f4980 C |
484 | tok.where <- InInitializer; |
485 | )); | |
486 | set_in_other xs | |
487 | ||
488 | | BToken _::xs -> set_in_other xs | |
489 | ||
ae4735db | 490 | | Braceised(body, tok1, tok2)::xs -> |
708f4980 C |
491 | body +> List.iter set_in_other; |
492 | set_in_other xs | |
493 | ||
708f4980 | 494 | |
ae4735db C |
495 | |
496 | ||
497 | let set_context_tag xs = | |
708f4980 C |
498 | begin |
499 | set_in_function_tag xs; | |
500 | set_in_other xs; | |
501 | end | |
ae4735db | 502 |