Release coccinelle-0.1.11rc1
[bpt/coccinelle.git] / parsing_c / token_views_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15 open Common
16
17 module TH = Token_helpers
18
19 open Parser_c
20
21 (*****************************************************************************)
22 (* Some debugging functions *)
23 (*****************************************************************************)
24
25 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
26
27 (* ------------------------------------------------------------------------- *)
28 (* fuzzy parsing, different "views" over the same program *)
29 (* ------------------------------------------------------------------------- *)
30
31
32 (* Normally I should not use ref/mutable in the token_extended type
33 * and I should have a set of functions taking a list of tokens and
34 * returning a list of tokens. The problem is that to make easier some
35 * functions, it is better to work on better representation, on "views"
36 * over this list of tokens. But then modifying those views and get
37 * back from those views to the original simple list of tokens is
38 * tedious. One way is to maintain next to the view a list of "actions"
39 * (I was using a hash storing the charpos of the token and associating
40 * the action) but it is tedious too. Simpler to use mutable/ref. We
41 * use the same idea that we use when working on the Ast_c. *)
42
43 (* old: when I was using the list of "actions" next to the views, the hash
44 * indexed by the charpos, there could have been some problems:
45 * how my fake_pos interact with the way I tag and adjust token ?
46 * because I base my tagging on the position of the token ! so sometimes
47 * could tag another fakeInfo that should not be tagged ?
48 * fortunately I don't use anymore this technique.
49 *)
50
51 (* update: quite close to the Place_c.Inxxx *)
52 type context =
53 InFunction | InEnum | InStruct | InInitializer | NoContext
54
55 type token_extended = {
56 mutable tok: Parser_c.token;
57 mutable where: context;
58
59 (* less: need also a after ? *)
60 mutable new_tokens_before : Parser_c.token list;
61
62 (* line x col cache, more easily accessible, of the info in the token *)
63 line: int;
64 col : int;
65 }
66
67 (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
68 * can confuse the parser.
69 *)
70 let set_as_comment cppkind x =
71 if TH.is_eof x.tok
72 then () (* otherwise parse_c will be lost if don't find a EOF token *)
73 else
74 x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
75
76 let mk_token_extended x =
77 let (line, col) = TH.linecol_of_tok x in
78 { tok = x;
79 line = line; col = col;
80 where = NoContext;
81 new_tokens_before = [];
82 }
83
84
85 let rebuild_tokens_extented toks_ext =
86 let _tokens = ref [] in
87 toks_ext +> List.iter (fun tok ->
88 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
89 push2 tok.tok _tokens
90 );
91 let tokens = List.rev !_tokens in
92 (tokens +> acc_map mk_token_extended)
93
94
95
96 (* x list list, because x list separated by ',' *)
97 type paren_grouped =
98 | Parenthised of paren_grouped list list * token_extended list
99 | PToken of token_extended
100
101 type brace_grouped =
102 | Braceised of
103 brace_grouped list list * token_extended * token_extended option
104 | BToken of token_extended
105
106 (* Far better data structure than doing hacks in the lexer or parser
107 * because in lexer we don't know to which ifdef a endif is related
108 * and so when we want to comment a ifdef, we don't know which endif
109 * we must also comment. Especially true for the #if 0 which sometimes
110 * have a #else part.
111 *
112 * x list list, because x list separated by #else or #elif
113 *)
114 type ifdef_grouped =
115 | Ifdef of ifdef_grouped list list * token_extended list
116 | Ifdefbool of bool * ifdef_grouped list list * token_extended list
117 | NotIfdefLine of token_extended list
118
119
120 type 'a line_grouped =
121 Line of 'a list
122
123
124 type body_function_grouped =
125 | BodyFunction of token_extended list
126 | NotBodyLine of token_extended list
127
128
129 (* ------------------------------------------------------------------------- *)
130 (* view builders *)
131 (* ------------------------------------------------------------------------- *)
132
133 (* todo: synchro ! use more indentation
134 * if paren not closed and same indentation level, certainly because
135 * part of a mid-ifdef-expression.
136 *)
137 let rec mk_parenthised xs =
138 let rec loop acc = function
139 | [] -> acc
140 | x::xs ->
141 (match x.tok with
142 | TOPar _ | TOParDefine _ ->
143 let body, extras, xs = mk_parameters [x] [] xs in
144 loop (Parenthised (body,extras)::acc) xs
145 | _ ->
146 loop (PToken x::acc) xs
147 ) in
148 List.rev(loop [] xs)
149
150 (* return the body of the parenthised expression and the rest of the tokens *)
151 and mk_parameters extras acc_before_sep xs =
152 match xs with
153 | [] ->
154 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
155 pr2 "PB: not found closing paren in fuzzy parsing";
156 [List.rev acc_before_sep], List.rev extras, []
157 | x::xs ->
158 (match x.tok with
159 (* synchro *)
160 | TOBrace _ when x.col =|= 0 ->
161 pr2 "PB: found synchro point } in paren";
162 [List.rev acc_before_sep], List.rev (extras), (x::xs)
163
164 | TCPar _ | TCParEOL _ ->
165 [List.rev acc_before_sep], List.rev (x::extras), xs
166 | TOPar _ | TOParDefine _ ->
167 let body, extrasnest, xs = mk_parameters [x] [] xs in
168 mk_parameters extras
169 (Parenthised (body,extrasnest)::acc_before_sep)
170 xs
171 | TComma _ ->
172 let body, extras, xs = mk_parameters (x::extras) [] xs in
173 (List.rev acc_before_sep)::body, extras, xs
174 | _ ->
175 mk_parameters extras (PToken x::acc_before_sep) xs
176 )
177
178
179
180
181 let rec mk_braceised xs =
182 let rec loop acc = function
183 | [] -> acc
184 | x::xs ->
185 (match x.tok with
186 | TOBrace _ ->
187 let body, endbrace, xs = mk_braceised_aux [] xs in
188 loop (Braceised (body, x, endbrace)::acc) xs
189 | TCBrace _ ->
190 pr2 "PB: found closing brace alone in fuzzy parsing";
191 loop (BToken x::acc) xs
192 | _ ->
193 loop (BToken x::acc) xs) in
194 List.rev(loop [] xs)
195
196 (* return the body of the parenthised expression and the rest of the tokens *)
197 and mk_braceised_aux acc xs =
198 match xs with
199 | [] ->
200 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
201 pr2 "PB: not found closing brace in fuzzy parsing";
202 [List.rev acc], None, []
203 | x::xs ->
204 (match x.tok with
205 | TCBrace _ -> [List.rev acc], Some x, xs
206 | TOBrace _ ->
207 let body, endbrace, xs = mk_braceised_aux [] xs in
208 mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
209 | _ ->
210 mk_braceised_aux (BToken x::acc) xs
211 )
212
213
214
215
216 let rec mk_ifdef xs =
217 match xs with
218 | [] -> []
219 | x::xs ->
220 (match x.tok with
221 | TIfdef _ ->
222 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
223 Ifdef (body, extra)::mk_ifdef xs
224 | TIfdefBool (b,_, _) ->
225 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
226
227 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
228 if !Flag_parsing_c.if0_passing
229 then Ifdefbool (b, body, extra)::mk_ifdef xs
230 else Ifdef(body, extra)::mk_ifdef xs
231
232 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
233 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
234 Ifdefbool (b, body, extra)::mk_ifdef xs
235
236
237 | _ ->
238 (* todo? can have some Ifdef in the line ? *)
239 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
240 NotIfdefLine line::mk_ifdef xs
241 )
242
243 and mk_ifdef_parameters extras acc_before_sep xs =
244 match xs with
245 | [] ->
246 (* Note that mk_ifdef is assuming that CPP instruction are alone
247 * on their line. Because I do a span (fun x -> is_same_line ...)
248 * I might take with me a #endif if this one is mixed on a line
249 * with some "normal" tokens.
250 *)
251 pr2 "PB: not found closing ifdef in fuzzy parsing";
252 [List.rev acc_before_sep], List.rev extras, []
253 | x::xs ->
254 (match x.tok with
255 | TEndif _ ->
256 [List.rev acc_before_sep], List.rev (x::extras), xs
257 | TIfdef _ ->
258 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
259 mk_ifdef_parameters
260 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
261
262 | TIfdefBool (b,_,_) ->
263 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
264
265 if !Flag_parsing_c.if0_passing
266 then
267 mk_ifdef_parameters
268 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
269 else
270 mk_ifdef_parameters
271 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
272
273
274 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
275 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
276 mk_ifdef_parameters
277 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
278
279 | TIfdefelse _
280 | TIfdefelif _ ->
281 let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
282 (List.rev acc_before_sep)::body, extras, xs
283 | _ ->
284 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
285 mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
286 )
287
288 (* --------------------------------------- *)
289
290 let line_of_paren = function
291 | PToken x -> x.line
292 | Parenthised (xxs, info_parens) ->
293 (match info_parens with
294 | [] -> raise Impossible
295 | x::xs -> x.line
296 )
297
298
299 let rec span_line_paren line = function
300 | [] -> [],[]
301 | x::xs ->
302 (match x with
303 | PToken tok when TH.is_eof tok.tok ->
304 [], x::xs
305 | _ ->
306 if line_of_paren x =|= line
307 then
308 let (l1, l2) = span_line_paren line xs in
309 (x::l1, l2)
310 else ([], x::xs)
311 )
312
313
314 let rec mk_line_parenthised xs =
315 match xs with
316 | [] -> []
317 | x::xs ->
318 let line_no = line_of_paren x in
319 let line, xs = span_line_paren line_no xs in
320 Line (x::line)::mk_line_parenthised xs
321
322
323 (* --------------------------------------- *)
324 let rec mk_body_function_grouped xs =
325 match xs with
326 | [] -> []
327 | x::xs ->
328 (match x with
329 | {tok = TOBrace _; col = 0} ->
330 let is_closing_brace = function
331 | {tok = TCBrace _; col = 0 } -> true
332 | _ -> false
333 in
334 let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
335 (match xs with
336 | ({tok = TCBrace _; col = 0 })::xs ->
337 BodyFunction body::mk_body_function_grouped xs
338 | [] ->
339 pr2 "PB:not found closing brace in fuzzy parsing";
340 [NotBodyLine body]
341 | _ -> raise Impossible
342 )
343
344 | _ ->
345 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
346 NotBodyLine line::mk_body_function_grouped xs
347 )
348
349
350 (* ------------------------------------------------------------------------- *)
351 (* view iterators *)
352 (* ------------------------------------------------------------------------- *)
353
354 let rec iter_token_paren f xs =
355 xs +> List.iter (function
356 | PToken tok -> f tok;
357 | Parenthised (xxs, info_parens) ->
358 info_parens +> List.iter f;
359 xxs +> List.iter (fun xs -> iter_token_paren f xs)
360 )
361
362 let rec iter_token_brace f xs =
363 xs +> List.iter (function
364 | BToken tok -> f tok;
365 | Braceised (xxs, tok1, tok2opt) ->
366 f tok1; do_option f tok2opt;
367 xxs +> List.iter (fun xs -> iter_token_brace f xs)
368 )
369
370 let rec iter_token_ifdef f xs =
371 xs +> List.iter (function
372 | NotIfdefLine xs -> xs +> List.iter f;
373 | Ifdefbool (_, xxs, info_ifdef)
374 | Ifdef (xxs, info_ifdef) ->
375 info_ifdef +> List.iter f;
376 xxs +> List.iter (iter_token_ifdef f)
377 )
378
379
380
381
382 let tokens_of_paren xs =
383 let g = ref [] in
384 xs +> iter_token_paren (fun tok -> push2 tok g);
385 List.rev !g
386
387
388 let tokens_of_paren_ordered xs =
389 let g = ref [] in
390
391 let rec aux_tokens_ordered = function
392 | PToken tok -> push2 tok g;
393 | Parenthised (xxs, info_parens) ->
394 let (opar, cpar, commas) =
395 match info_parens with
396 | opar::xs ->
397 (match List.rev xs with
398 | cpar::xs ->
399 opar, cpar, List.rev xs
400 | _ -> raise Impossible
401 )
402 | _ -> raise Impossible
403 in
404 push2 opar g;
405 aux_args (xxs,commas);
406 push2 cpar g;
407
408 and aux_args (xxs, commas) =
409 match xxs, commas with
410 | [], [] -> ()
411 | [xs], [] -> xs +> List.iter aux_tokens_ordered
412 | xs::ys::xxs, comma::commas ->
413 xs +> List.iter aux_tokens_ordered;
414 push2 comma g;
415 aux_args (ys::xxs, commas)
416 | _ -> raise Impossible
417
418 in
419
420 xs +> List.iter aux_tokens_ordered;
421 List.rev !g
422
423
424
425 (* ------------------------------------------------------------------------- *)
426 (* set the context info in token *)
427 (* ------------------------------------------------------------------------- *)
428
429
430 let rec set_in_function_tag xs =
431 (* could try: ) { } but it can be the ) of a if or while, so
432 * better to base the heuristic on the position in column zero.
433 * Note that some struct or enum or init put also their { in first column
434 * but set_in_other will overwrite the previous InFunction tag.
435 *)
436 match xs with
437 | [] -> ()
438 (* ) { and the closing } is in column zero, then certainly a function *)
439 | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
440 when tok1.col <> 0 && tok2.col =|= 0 ->
441 body +> List.iter (iter_token_brace (fun tok ->
442 tok.where <- InFunction
443 ));
444 set_in_function_tag xs
445
446 | (BToken x)::xs -> set_in_function_tag xs
447
448 | (Braceised (body, tok1, Some tok2))::xs
449 when tok1.col =|= 0 && tok2.col =|= 0 ->
450 body +> List.iter (iter_token_brace (fun tok ->
451 tok.where <- InFunction
452 ));
453 set_in_function_tag xs
454 | Braceised (body, tok1, tok2)::xs ->
455 set_in_function_tag xs
456
457
458 let rec set_in_other xs =
459 match xs with
460 | [] -> ()
461 (* enum x { } *)
462 | BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
463 ::Braceised(body, tok1, tok2)::xs
464 | BToken ({tok = Tenum _})
465 ::Braceised(body, tok1, tok2)::xs
466 ->
467 body +> List.iter (iter_token_brace (fun tok ->
468 tok.where <- InEnum;
469 ));
470 set_in_other xs
471
472 (* struct x { } *)
473 | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
474 ::Braceised(body, tok1, tok2)::xs ->
475 body +> List.iter (iter_token_brace (fun tok ->
476 tok.where <- InStruct;
477 ));
478 set_in_other xs
479 (* = { } *)
480 | BToken ({tok = TEq _})
481 ::Braceised(body, tok1, tok2)::xs ->
482 body +> List.iter (iter_token_brace (fun tok ->
483 tok.where <- InInitializer;
484 ));
485 set_in_other xs
486
487 | BToken _::xs -> set_in_other xs
488
489 | Braceised(body, tok1, tok2)::xs ->
490 body +> List.iter set_in_other;
491 set_in_other xs
492
493
494
495
496 let set_context_tag xs =
497 begin
498 set_in_function_tag xs;
499 set_in_other xs;
500 end
501