Release coccinelle-0.1.8
[bpt/coccinelle.git] / parsing_c / token_views_c.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15 open Common
16
17 module TH = Token_helpers
18
19 open Parser_c
20
21 (*****************************************************************************)
22 (* Some debugging functions *)
23 (*****************************************************************************)
24
25 let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
26
27 (* ------------------------------------------------------------------------- *)
28 (* fuzzy parsing, different "views" over the same program *)
29 (* ------------------------------------------------------------------------- *)
30
31
32 (* Normally I should not use ref/mutable in the token_extended type
33 * and I should have a set of functions taking a list of tokens and
34 * returning a list of tokens. The problem is that to make easier some
35 * functions, it is better to work on better representation, on "views"
36 * over this list of tokens. But then modifying those views and get
37 * back from those views to the original simple list of tokens is
38 * tedious. One way is to maintain next to the view a list of "actions"
39 * (I was using a hash storing the charpos of the token and associating
40 * the action) but it is tedious too. Simpler to use mutable/ref. We
41 * use the same idea that we use when working on the Ast_c. *)
42
43 (* old: when I was using the list of "actions" next to the views, the hash
44 * indexed by the charpos, there could have been some problems:
45 * how my fake_pos interact with the way I tag and adjust token ?
46 * because I base my tagging on the position of the token ! so sometimes
47 * could tag another fakeInfo that should not be tagged ?
48 * fortunately I don't use anymore this technique.
49 *)
50
51 (* update: quite close to the Place_c.Inxxx *)
52 type context =
53 InFunction | InEnum | InStruct | InInitializer | NoContext
54
55 type token_extended = {
56 mutable tok: Parser_c.token;
57 mutable where: context;
58
59 (* less: need also a after ? *)
60 mutable new_tokens_before : Parser_c.token list;
61
62 (* line x col cache, more easily accessible, of the info in the token *)
63 line: int;
64 col : int;
65 }
66
67 (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
68 * can confuse the parser.
69 *)
70 let set_as_comment cppkind x =
71 if TH.is_eof x.tok
72 then () (* otherwise parse_c will be lost if don't find a EOF token *)
73 else
74 x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
75
76 let mk_token_extended x =
77 let (line, col) = TH.linecol_of_tok x in
78 { tok = x;
79 line = line; col = col;
80 where = NoContext;
81 new_tokens_before = [];
82 }
83
84
85 let rebuild_tokens_extented toks_ext =
86 let _tokens = ref [] in
87 toks_ext +> List.iter (fun tok ->
88 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
89 push2 tok.tok _tokens
90 );
91 let tokens = List.rev !_tokens in
92 (tokens +> acc_map mk_token_extended)
93
94
95
96 (* x list list, because x list separated by ',' *)
97 type paren_grouped =
98 | Parenthised of paren_grouped list list * token_extended list
99 | PToken of token_extended
100
101 type brace_grouped =
102 | Braceised of
103 brace_grouped list list * token_extended * token_extended option
104 | BToken of token_extended
105
106 (* Far better data structure than doing hacks in the lexer or parser
107 * because in lexer we don't know to which ifdef a endif is related
108 * and so when we want to comment a ifdef, we don't know which endif
109 * we must also comment. Especially true for the #if 0 which sometimes
110 * have a #else part.
111 *
112 * x list list, because x list separated by #else or #elif
113 *)
114 type ifdef_grouped =
115 | Ifdef of ifdef_grouped list list * token_extended list
116 | Ifdefbool of bool * ifdef_grouped list list * token_extended list
117 | NotIfdefLine of token_extended list
118
119
120 type 'a line_grouped =
121 Line of 'a list
122
123
124 type body_function_grouped =
125 | BodyFunction of token_extended list
126 | NotBodyLine of token_extended list
127
128
129 (* ------------------------------------------------------------------------- *)
130 (* view builders *)
131 (* ------------------------------------------------------------------------- *)
132
133 (* todo: synchro ! use more indentation
134 * if paren not closed and same indentation level, certainly because
135 * part of a mid-ifdef-expression.
136 *)
137 let rec mk_parenthised xs =
138 match xs with
139 | [] -> []
140 | x::xs ->
141 (match x.tok with
142 | TOPar _ | TOParDefine _ ->
143 let body, extras, xs = mk_parameters [x] [] xs in
144 Parenthised (body,extras)::mk_parenthised xs
145 | _ ->
146 PToken x::mk_parenthised xs
147 )
148
149 (* return the body of the parenthised expression and the rest of the tokens *)
150 and mk_parameters extras acc_before_sep xs =
151 match xs with
152 | [] ->
153 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
154 pr2 "PB: not found closing paren in fuzzy parsing";
155 [List.rev acc_before_sep], List.rev extras, []
156 | x::xs ->
157 (match x.tok with
158 (* synchro *)
159 | TOBrace _ when x.col =|= 0 ->
160 pr2 "PB: found synchro point } in paren";
161 [List.rev acc_before_sep], List.rev (extras), (x::xs)
162
163 | TCPar _ | TCParEOL _ ->
164 [List.rev acc_before_sep], List.rev (x::extras), xs
165 | TOPar _ | TOParDefine _ ->
166 let body, extrasnest, xs = mk_parameters [x] [] xs in
167 mk_parameters extras
168 (Parenthised (body,extrasnest)::acc_before_sep)
169 xs
170 | TComma _ ->
171 let body, extras, xs = mk_parameters (x::extras) [] xs in
172 (List.rev acc_before_sep)::body, extras, xs
173 | _ ->
174 mk_parameters extras (PToken x::acc_before_sep) xs
175 )
176
177
178
179
180 let rec mk_braceised xs =
181 match xs with
182 | [] -> []
183 | x::xs ->
184 (match x.tok with
185 | TOBrace _ ->
186 let body, endbrace, xs = mk_braceised_aux [] xs in
187 Braceised (body, x, endbrace)::mk_braceised xs
188 | TCBrace _ ->
189 pr2 "PB: found closing brace alone in fuzzy parsing";
190 BToken x::mk_braceised xs
191 | _ ->
192 BToken x::mk_braceised xs
193 )
194
195 (* return the body of the parenthised expression and the rest of the tokens *)
196 and mk_braceised_aux acc xs =
197 match xs with
198 | [] ->
199 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
200 pr2 "PB: not found closing brace in fuzzy parsing";
201 [List.rev acc], None, []
202 | x::xs ->
203 (match x.tok with
204 | TCBrace _ -> [List.rev acc], Some x, xs
205 | TOBrace _ ->
206 let body, endbrace, xs = mk_braceised_aux [] xs in
207 mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
208 | _ ->
209 mk_braceised_aux (BToken x::acc) xs
210 )
211
212
213
214
215 let rec mk_ifdef xs =
216 match xs with
217 | [] -> []
218 | x::xs ->
219 (match x.tok with
220 | TIfdef _ ->
221 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
222 Ifdef (body, extra)::mk_ifdef xs
223 | TIfdefBool (b,_, _) ->
224 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
225
226 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
227 if !Flag_parsing_c.if0_passing
228 then Ifdefbool (b, body, extra)::mk_ifdef xs
229 else Ifdef(body, extra)::mk_ifdef xs
230
231 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
232 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
233 Ifdefbool (b, body, extra)::mk_ifdef xs
234
235
236 | _ ->
237 (* todo? can have some Ifdef in the line ? *)
238 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
239 NotIfdefLine line::mk_ifdef xs
240 )
241
242 and mk_ifdef_parameters extras acc_before_sep xs =
243 match xs with
244 | [] ->
245 (* Note that mk_ifdef is assuming that CPP instruction are alone
246 * on their line. Because I do a span (fun x -> is_same_line ...)
247 * I might take with me a #endif if this one is mixed on a line
248 * with some "normal" tokens.
249 *)
250 pr2 "PB: not found closing ifdef in fuzzy parsing";
251 [List.rev acc_before_sep], List.rev extras, []
252 | x::xs ->
253 (match x.tok with
254 | TEndif _ ->
255 [List.rev acc_before_sep], List.rev (x::extras), xs
256 | TIfdef _ ->
257 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
258 mk_ifdef_parameters
259 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
260
261 | TIfdefBool (b,_,_) ->
262 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
263
264 if !Flag_parsing_c.if0_passing
265 then
266 mk_ifdef_parameters
267 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
268 else
269 mk_ifdef_parameters
270 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
271
272
273 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
274 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
275 mk_ifdef_parameters
276 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
277
278 | TIfdefelse _
279 | TIfdefelif _ ->
280 let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
281 (List.rev acc_before_sep)::body, extras, xs
282 | _ ->
283 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
284 mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
285 )
286
287 (* --------------------------------------- *)
288
289 let line_of_paren = function
290 | PToken x -> x.line
291 | Parenthised (xxs, info_parens) ->
292 (match info_parens with
293 | [] -> raise Impossible
294 | x::xs -> x.line
295 )
296
297
298 let rec span_line_paren line = function
299 | [] -> [],[]
300 | x::xs ->
301 (match x with
302 | PToken tok when TH.is_eof tok.tok ->
303 [], x::xs
304 | _ ->
305 if line_of_paren x =|= line
306 then
307 let (l1, l2) = span_line_paren line xs in
308 (x::l1, l2)
309 else ([], x::xs)
310 )
311
312
313 let rec mk_line_parenthised xs =
314 match xs with
315 | [] -> []
316 | x::xs ->
317 let line_no = line_of_paren x in
318 let line, xs = span_line_paren line_no xs in
319 Line (x::line)::mk_line_parenthised xs
320
321
322 (* --------------------------------------- *)
323 let rec mk_body_function_grouped xs =
324 match xs with
325 | [] -> []
326 | x::xs ->
327 (match x with
328 | {tok = TOBrace _; col = 0} ->
329 let is_closing_brace = function
330 | {tok = TCBrace _; col = 0 } -> true
331 | _ -> false
332 in
333 let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
334 (match xs with
335 | ({tok = TCBrace _; col = 0 })::xs ->
336 BodyFunction body::mk_body_function_grouped xs
337 | [] ->
338 pr2 "PB:not found closing brace in fuzzy parsing";
339 [NotBodyLine body]
340 | _ -> raise Impossible
341 )
342
343 | _ ->
344 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
345 NotBodyLine line::mk_body_function_grouped xs
346 )
347
348
349 (* ------------------------------------------------------------------------- *)
350 (* view iterators *)
351 (* ------------------------------------------------------------------------- *)
352
353 let rec iter_token_paren f xs =
354 xs +> List.iter (function
355 | PToken tok -> f tok;
356 | Parenthised (xxs, info_parens) ->
357 info_parens +> List.iter f;
358 xxs +> List.iter (fun xs -> iter_token_paren f xs)
359 )
360
361 let rec iter_token_brace f xs =
362 xs +> List.iter (function
363 | BToken tok -> f tok;
364 | Braceised (xxs, tok1, tok2opt) ->
365 f tok1; do_option f tok2opt;
366 xxs +> List.iter (fun xs -> iter_token_brace f xs)
367 )
368
369 let rec iter_token_ifdef f xs =
370 xs +> List.iter (function
371 | NotIfdefLine xs -> xs +> List.iter f;
372 | Ifdefbool (_, xxs, info_ifdef)
373 | Ifdef (xxs, info_ifdef) ->
374 info_ifdef +> List.iter f;
375 xxs +> List.iter (iter_token_ifdef f)
376 )
377
378
379
380
381 let tokens_of_paren xs =
382 let g = ref [] in
383 xs +> iter_token_paren (fun tok -> push2 tok g);
384 List.rev !g
385
386
387 let tokens_of_paren_ordered xs =
388 let g = ref [] in
389
390 let rec aux_tokens_ordered = function
391 | PToken tok -> push2 tok g;
392 | Parenthised (xxs, info_parens) ->
393 let (opar, cpar, commas) =
394 match info_parens with
395 | opar::xs ->
396 (match List.rev xs with
397 | cpar::xs ->
398 opar, cpar, List.rev xs
399 | _ -> raise Impossible
400 )
401 | _ -> raise Impossible
402 in
403 push2 opar g;
404 aux_args (xxs,commas);
405 push2 cpar g;
406
407 and aux_args (xxs, commas) =
408 match xxs, commas with
409 | [], [] -> ()
410 | [xs], [] -> xs +> List.iter aux_tokens_ordered
411 | xs::ys::xxs, comma::commas ->
412 xs +> List.iter aux_tokens_ordered;
413 push2 comma g;
414 aux_args (ys::xxs, commas)
415 | _ -> raise Impossible
416
417 in
418
419 xs +> List.iter aux_tokens_ordered;
420 List.rev !g
421
422
423
424 (* ------------------------------------------------------------------------- *)
425 (* set the context info in token *)
426 (* ------------------------------------------------------------------------- *)
427
428
429 let rec set_in_function_tag xs =
430 (* could try: ) { } but it can be the ) of a if or while, so
431 * better to base the heuristic on the position in column zero.
432 * Note that some struct or enum or init put also their { in first column
433 * but set_in_other will overwrite the previous InFunction tag.
434 *)
435 match xs with
436 | [] -> ()
437 (* ) { and the closing } is in column zero, then certainly a function *)
438 | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
439 when tok1.col <> 0 && tok2.col =|= 0 ->
440 body +> List.iter (iter_token_brace (fun tok ->
441 tok.where <- InFunction
442 ));
443 set_in_function_tag xs
444
445 | (BToken x)::xs -> set_in_function_tag xs
446
447 | (Braceised (body, tok1, Some tok2))::xs
448 when tok1.col =|= 0 && tok2.col =|= 0 ->
449 body +> List.iter (iter_token_brace (fun tok ->
450 tok.where <- InFunction
451 ));
452 set_in_function_tag xs
453 | Braceised (body, tok1, tok2)::xs ->
454 set_in_function_tag xs
455
456
457 let rec set_in_other xs =
458 match xs with
459 | [] -> ()
460 (* enum x { } *)
461 | BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
462 ::Braceised(body, tok1, tok2)::xs
463 | BToken ({tok = Tenum _})
464 ::Braceised(body, tok1, tok2)::xs
465 ->
466 body +> List.iter (iter_token_brace (fun tok ->
467 tok.where <- InEnum;
468 ));
469 set_in_other xs
470
471 (* struct x { } *)
472 | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
473 ::Braceised(body, tok1, tok2)::xs ->
474 body +> List.iter (iter_token_brace (fun tok ->
475 tok.where <- InStruct;
476 ));
477 set_in_other xs
478 (* = { } *)
479 | BToken ({tok = TEq _})
480 ::Braceised(body, tok1, tok2)::xs ->
481 body +> List.iter (iter_token_brace (fun tok ->
482 tok.where <- InInitializer;
483 ));
484 set_in_other xs
485
486 | BToken _::xs -> set_in_other xs
487
488 | Braceised(body, tok1, tok2)::xs ->
489 body +> List.iter set_in_other;
490 set_in_other xs
491
492
493
494
495 let set_context_tag xs =
496 begin
497 set_in_function_tag xs;
498 set_in_other xs;
499 end
500