coccinelle release 0.2.5
[bpt/coccinelle.git] / parsing_c / token_views_c.ml
CommitLineData
708f4980 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
708f4980
C
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
708f4980
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
708f4980 19
ae4735db 20open Parser_c
708f4980
C
21
22(*****************************************************************************)
23(* Some debugging functions *)
24(*****************************************************************************)
25
ae4735db 26let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
708f4980
C
27
28(* ------------------------------------------------------------------------- *)
29(* fuzzy parsing, different "views" over the same program *)
30(* ------------------------------------------------------------------------- *)
31
32
33(* Normally I should not use ref/mutable in the token_extended type
34 * and I should have a set of functions taking a list of tokens and
35 * returning a list of tokens. The problem is that to make easier some
36 * functions, it is better to work on better representation, on "views"
37 * over this list of tokens. But then modifying those views and get
38 * back from those views to the original simple list of tokens is
39 * tedious. One way is to maintain next to the view a list of "actions"
40 * (I was using a hash storing the charpos of the token and associating
41 * the action) but it is tedious too. Simpler to use mutable/ref. We
42 * use the same idea that we use when working on the Ast_c. *)
43
44(* old: when I was using the list of "actions" next to the views, the hash
45 * indexed by the charpos, there could have been some problems:
46 * how my fake_pos interact with the way I tag and adjust token ?
47 * because I base my tagging on the position of the token ! so sometimes
ae4735db 48 * could tag another fakeInfo that should not be tagged ?
708f4980
C
49 * fortunately I don't use anymore this technique.
50 *)
51
52(* update: quite close to the Place_c.Inxxx *)
ae4735db 53type context =
708f4980
C
54 InFunction | InEnum | InStruct | InInitializer | NoContext
55
ae4735db 56type token_extended = {
708f4980
C
57 mutable tok: Parser_c.token;
58 mutable where: context;
59
60 (* less: need also a after ? *)
61 mutable new_tokens_before : Parser_c.token list;
62
63 (* line x col cache, more easily accessible, of the info in the token *)
ae4735db 64 line: int;
708f4980
C
65 col : int;
66}
67
68(* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
69 * can confuse the parser.
70 *)
ae4735db
C
71let set_as_comment cppkind x =
72 if TH.is_eof x.tok
708f4980 73 then () (* otherwise parse_c will be lost if don't find a EOF token *)
ae4735db 74 else
708f4980
C
75 x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
76
ae4735db 77let mk_token_extended x =
708f4980 78 let (line, col) = TH.linecol_of_tok x in
ae4735db
C
79 { tok = x;
80 line = line; col = col;
81 where = NoContext;
708f4980
C
82 new_tokens_before = [];
83 }
84
85
ae4735db 86let rebuild_tokens_extented toks_ext =
708f4980 87 let _tokens = ref [] in
ae4735db 88 toks_ext +> List.iter (fun tok ->
708f4980 89 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
ae4735db 90 push2 tok.tok _tokens
708f4980
C
91 );
92 let tokens = List.rev !_tokens in
93 (tokens +> acc_map mk_token_extended)
94
95
96
ae4735db
C
97(* x list list, because x list separated by ',' *)
98type paren_grouped =
708f4980
C
99 | Parenthised of paren_grouped list list * token_extended list
100 | PToken of token_extended
101
ae4735db
C
102type brace_grouped =
103 | Braceised of
708f4980
C
104 brace_grouped list list * token_extended * token_extended option
105 | BToken of token_extended
106
107(* Far better data structure than doing hacks in the lexer or parser
108 * because in lexer we don't know to which ifdef a endif is related
109 * and so when we want to comment a ifdef, we don't know which endif
110 * we must also comment. Especially true for the #if 0 which sometimes
111 * have a #else part.
ae4735db
C
112 *
113 * x list list, because x list separated by #else or #elif
114 *)
115type ifdef_grouped =
708f4980
C
116 | Ifdef of ifdef_grouped list list * token_extended list
117 | Ifdefbool of bool * ifdef_grouped list list * token_extended list
118 | NotIfdefLine of token_extended list
119
120
ae4735db 121type 'a line_grouped =
708f4980
C
122 Line of 'a list
123
124
ae4735db 125type body_function_grouped =
708f4980
C
126 | BodyFunction of token_extended list
127 | NotBodyLine of token_extended list
128
129
130(* ------------------------------------------------------------------------- *)
131(* view builders *)
132(* ------------------------------------------------------------------------- *)
133
ae4735db 134(* todo: synchro ! use more indentation
708f4980
C
135 * if paren not closed and same indentation level, certainly because
136 * part of a mid-ifdef-expression.
137*)
ae4735db 138let rec mk_parenthised xs =
951c7801
C
139 let rec loop acc = function
140 | [] -> acc
ae4735db
C
141 | x::xs ->
142 (match x.tok with
143 | TOPar _ | TOParDefine _ ->
708f4980 144 let body, extras, xs = mk_parameters [x] [] xs in
951c7801 145 loop (Parenthised (body,extras)::acc) xs
ae4735db 146 | _ ->
951c7801
C
147 loop (PToken x::acc) xs
148 ) in
149 List.rev(loop [] xs)
708f4980
C
150
151(* return the body of the parenthised expression and the rest of the tokens *)
ae4735db 152and mk_parameters extras acc_before_sep xs =
708f4980 153 match xs with
ae4735db 154 | [] ->
708f4980
C
155 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
156 pr2 "PB: not found closing paren in fuzzy parsing";
157 [List.rev acc_before_sep], List.rev extras, []
ae4735db
C
158 | x::xs ->
159 (match x.tok with
708f4980 160 (* synchro *)
ae4735db 161 | TOBrace _ when x.col =|= 0 ->
708f4980
C
162 pr2 "PB: found synchro point } in paren";
163 [List.rev acc_before_sep], List.rev (extras), (x::xs)
164
ae4735db 165 | TCPar _ | TCParEOL _ ->
708f4980 166 [List.rev acc_before_sep], List.rev (x::extras), xs
ae4735db 167 | TOPar _ | TOParDefine _ ->
708f4980 168 let body, extrasnest, xs = mk_parameters [x] [] xs in
ae4735db
C
169 mk_parameters extras
170 (Parenthised (body,extrasnest)::acc_before_sep)
708f4980 171 xs
ae4735db 172 | TComma _ ->
708f4980 173 let body, extras, xs = mk_parameters (x::extras) [] xs in
ae4735db
C
174 (List.rev acc_before_sep)::body, extras, xs
175 | _ ->
708f4980
C
176 mk_parameters extras (PToken x::acc_before_sep) xs
177 )
178
179
180
181
ae4735db 182let rec mk_braceised xs =
951c7801
C
183 let rec loop acc = function
184 | [] -> acc
ae4735db
C
185 | x::xs ->
186 (match x.tok with
187 | TOBrace _ ->
951c7801
C
188 let body, endbrace, xs = mk_braceised_aux [] xs in
189 loop (Braceised (body, x, endbrace)::acc) xs
ae4735db 190 | TCBrace _ ->
951c7801
C
191 pr2 "PB: found closing brace alone in fuzzy parsing";
192 loop (BToken x::acc) xs
ae4735db 193 | _ ->
951c7801
C
194 loop (BToken x::acc) xs) in
195 List.rev(loop [] xs)
708f4980
C
196
197(* return the body of the parenthised expression and the rest of the tokens *)
ae4735db 198and mk_braceised_aux acc xs =
708f4980 199 match xs with
ae4735db 200 | [] ->
708f4980
C
201 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
202 pr2 "PB: not found closing brace in fuzzy parsing";
203 [List.rev acc], None, []
ae4735db
C
204 | x::xs ->
205 (match x.tok with
708f4980 206 | TCBrace _ -> [List.rev acc], Some x, xs
ae4735db 207 | TOBrace _ ->
708f4980
C
208 let body, endbrace, xs = mk_braceised_aux [] xs in
209 mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
ae4735db 210 | _ ->
708f4980
C
211 mk_braceised_aux (BToken x::acc) xs
212 )
213
708f4980
C
214
215
ae4735db
C
216
217let rec mk_ifdef xs =
708f4980
C
218 match xs with
219 | [] -> []
ae4735db
C
220 | x::xs ->
221 (match x.tok with
222 | TIfdef _ ->
708f4980
C
223 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
224 Ifdef (body, extra)::mk_ifdef xs
ae4735db 225 | TIfdefBool (b,_, _) ->
708f4980 226 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
ae4735db 227
708f4980
C
228 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
229 if !Flag_parsing_c.if0_passing
230 then Ifdefbool (b, body, extra)::mk_ifdef xs
231 else Ifdef(body, extra)::mk_ifdef xs
232
ae4735db 233 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
708f4980
C
234 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
235 Ifdefbool (b, body, extra)::mk_ifdef xs
236
ae4735db
C
237
238 | _ ->
708f4980
C
239 (* todo? can have some Ifdef in the line ? *)
240 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
ae4735db 241 NotIfdefLine line::mk_ifdef xs
708f4980
C
242 )
243
ae4735db 244and mk_ifdef_parameters extras acc_before_sep xs =
708f4980 245 match xs with
ae4735db 246 | [] ->
708f4980
C
247 (* Note that mk_ifdef is assuming that CPP instruction are alone
248 * on their line. Because I do a span (fun x -> is_same_line ...)
249 * I might take with me a #endif if this one is mixed on a line
250 * with some "normal" tokens.
251 *)
252 pr2 "PB: not found closing ifdef in fuzzy parsing";
253 [List.rev acc_before_sep], List.rev extras, []
ae4735db
C
254 | x::xs ->
255 (match x.tok with
256 | TEndif _ ->
708f4980 257 [List.rev acc_before_sep], List.rev (x::extras), xs
ae4735db 258 | TIfdef _ ->
708f4980 259 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
ae4735db 260 mk_ifdef_parameters
708f4980
C
261 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
262
ae4735db 263 | TIfdefBool (b,_,_) ->
708f4980
C
264 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
265
266 if !Flag_parsing_c.if0_passing
267 then
ae4735db 268 mk_ifdef_parameters
708f4980 269 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
ae4735db
C
270 else
271 mk_ifdef_parameters
708f4980
C
272 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
273
274
ae4735db 275 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
708f4980 276 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
ae4735db 277 mk_ifdef_parameters
708f4980
C
278 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
279
ae4735db
C
280 | TIfdefelse _
281 | TIfdefelif _ ->
708f4980 282 let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
ae4735db
C
283 (List.rev acc_before_sep)::body, extras, xs
284 | _ ->
708f4980
C
285 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
286 mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
287 )
288
289(* --------------------------------------- *)
290
291let line_of_paren = function
292 | PToken x -> x.line
ae4735db 293 | Parenthised (xxs, info_parens) ->
708f4980
C
294 (match info_parens with
295 | [] -> raise Impossible
296 | x::xs -> x.line
297 )
298
299
300let rec span_line_paren line = function
301 | [] -> [],[]
ae4735db 302 | x::xs ->
708f4980 303 (match x with
ae4735db 304 | PToken tok when TH.is_eof tok.tok ->
708f4980 305 [], x::xs
ae4735db
C
306 | _ ->
307 if line_of_paren x =|= line
708f4980
C
308 then
309 let (l1, l2) = span_line_paren line xs in
310 (x::l1, l2)
311 else ([], x::xs)
312 )
708f4980 313
ae4735db
C
314
315let rec mk_line_parenthised xs =
708f4980
C
316 match xs with
317 | [] -> []
ae4735db 318 | x::xs ->
708f4980
C
319 let line_no = line_of_paren x in
320 let line, xs = span_line_paren line_no xs in
321 Line (x::line)::mk_line_parenthised xs
322
323
324(* --------------------------------------- *)
ae4735db
C
325let rec mk_body_function_grouped xs =
326 match xs with
708f4980 327 | [] -> []
ae4735db 328 | x::xs ->
708f4980 329 (match x with
ae4735db
C
330 | {tok = TOBrace _; col = 0} ->
331 let is_closing_brace = function
332 | {tok = TCBrace _; col = 0 } -> true
333 | _ -> false
708f4980
C
334 in
335 let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
336 (match xs with
ae4735db 337 | ({tok = TCBrace _; col = 0 })::xs ->
708f4980 338 BodyFunction body::mk_body_function_grouped xs
ae4735db 339 | [] ->
708f4980
C
340 pr2 "PB:not found closing brace in fuzzy parsing";
341 [NotBodyLine body]
342 | _ -> raise Impossible
343 )
ae4735db
C
344
345 | _ ->
708f4980 346 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
ae4735db 347 NotBodyLine line::mk_body_function_grouped xs
708f4980
C
348 )
349
350
351(* ------------------------------------------------------------------------- *)
352(* view iterators *)
353(* ------------------------------------------------------------------------- *)
354
ae4735db 355let rec iter_token_paren f xs =
708f4980
C
356 xs +> List.iter (function
357 | PToken tok -> f tok;
ae4735db 358 | Parenthised (xxs, info_parens) ->
708f4980
C
359 info_parens +> List.iter f;
360 xxs +> List.iter (fun xs -> iter_token_paren f xs)
361 )
362
ae4735db 363let rec iter_token_brace f xs =
708f4980
C
364 xs +> List.iter (function
365 | BToken tok -> f tok;
ae4735db 366 | Braceised (xxs, tok1, tok2opt) ->
708f4980
C
367 f tok1; do_option f tok2opt;
368 xxs +> List.iter (fun xs -> iter_token_brace f xs)
369 )
370
ae4735db 371let rec iter_token_ifdef f xs =
708f4980
C
372 xs +> List.iter (function
373 | NotIfdefLine xs -> xs +> List.iter f;
ae4735db
C
374 | Ifdefbool (_, xxs, info_ifdef)
375 | Ifdef (xxs, info_ifdef) ->
708f4980
C
376 info_ifdef +> List.iter f;
377 xxs +> List.iter (iter_token_ifdef f)
378 )
379
380
381
382
ae4735db 383let tokens_of_paren xs =
708f4980
C
384 let g = ref [] in
385 xs +> iter_token_paren (fun tok -> push2 tok g);
386 List.rev !g
387
388
ae4735db 389let tokens_of_paren_ordered xs =
708f4980
C
390 let g = ref [] in
391
392 let rec aux_tokens_ordered = function
393 | PToken tok -> push2 tok g;
ae4735db
C
394 | Parenthised (xxs, info_parens) ->
395 let (opar, cpar, commas) =
708f4980 396 match info_parens with
ae4735db 397 | opar::xs ->
708f4980 398 (match List.rev xs with
ae4735db 399 | cpar::xs ->
708f4980
C
400 opar, cpar, List.rev xs
401 | _ -> raise Impossible
402 )
403 | _ -> raise Impossible
404 in
405 push2 opar g;
406 aux_args (xxs,commas);
407 push2 cpar g;
408
409 and aux_args (xxs, commas) =
410 match xxs, commas with
411 | [], [] -> ()
412 | [xs], [] -> xs +> List.iter aux_tokens_ordered
ae4735db 413 | xs::ys::xxs, comma::commas ->
708f4980
C
414 xs +> List.iter aux_tokens_ordered;
415 push2 comma g;
416 aux_args (ys::xxs, commas)
417 | _ -> raise Impossible
418
419 in
420
421 xs +> List.iter aux_tokens_ordered;
422 List.rev !g
423
424
425
426(* ------------------------------------------------------------------------- *)
427(* set the context info in token *)
428(* ------------------------------------------------------------------------- *)
429
430
ae4735db
C
431let rec set_in_function_tag xs =
432 (* could try: ) { } but it can be the ) of a if or while, so
708f4980
C
433 * better to base the heuristic on the position in column zero.
434 * Note that some struct or enum or init put also their { in first column
435 * but set_in_other will overwrite the previous InFunction tag.
436 *)
437 match xs with
438 | [] -> ()
439 (* ) { and the closing } is in column zero, then certainly a function *)
ae4735db
C
440 | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
441 when tok1.col <> 0 && tok2.col =|= 0 ->
442 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
443 tok.where <- InFunction
444 ));
445 set_in_function_tag xs
446
447 | (BToken x)::xs -> set_in_function_tag xs
448
ae4735db
C
449 | (Braceised (body, tok1, Some tok2))::xs
450 when tok1.col =|= 0 && tok2.col =|= 0 ->
451 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
452 tok.where <- InFunction
453 ));
454 set_in_function_tag xs
ae4735db 455 | Braceised (body, tok1, tok2)::xs ->
708f4980 456 set_in_function_tag xs
708f4980 457
ae4735db
C
458
459let rec set_in_other xs =
460 match xs with
708f4980
C
461 | [] -> ()
462 (* enum x { } *)
463 | BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
ae4735db 464 ::Braceised(body, tok1, tok2)::xs
708f4980 465 | BToken ({tok = Tenum _})
ae4735db
C
466 ::Braceised(body, tok1, tok2)::xs
467 ->
468 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
469 tok.where <- InEnum;
470 ));
471 set_in_other xs
472
473 (* struct x { } *)
474 | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
ae4735db
C
475 ::Braceised(body, tok1, tok2)::xs ->
476 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
477 tok.where <- InStruct;
478 ));
479 set_in_other xs
480 (* = { } *)
481 | BToken ({tok = TEq _})
ae4735db
C
482 ::Braceised(body, tok1, tok2)::xs ->
483 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
484 tok.where <- InInitializer;
485 ));
486 set_in_other xs
487
488 | BToken _::xs -> set_in_other xs
489
ae4735db 490 | Braceised(body, tok1, tok2)::xs ->
708f4980
C
491 body +> List.iter set_in_other;
492 set_in_other xs
493
708f4980 494
ae4735db
C
495
496
497let set_context_tag xs =
708f4980
C
498 begin
499 set_in_function_tag xs;
500 set_in_other xs;
501 end
ae4735db 502