permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / token_views_c.ml
CommitLineData
708f4980 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
708f4980
C
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
708f4980
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16open Common
17
ae4735db 18module TH = Token_helpers
708f4980 19
ae4735db 20open Parser_c
708f4980
C
21
22(*****************************************************************************)
23(* Some debugging functions *)
24(*****************************************************************************)
25
ae4735db 26let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_parsing
708f4980
C
27
28(* ------------------------------------------------------------------------- *)
29(* fuzzy parsing, different "views" over the same program *)
30(* ------------------------------------------------------------------------- *)
31
32
33(* Normally I should not use ref/mutable in the token_extended type
34 * and I should have a set of functions taking a list of tokens and
35 * returning a list of tokens. The problem is that to make easier some
36 * functions, it is better to work on better representation, on "views"
37 * over this list of tokens. But then modifying those views and get
38 * back from those views to the original simple list of tokens is
39 * tedious. One way is to maintain next to the view a list of "actions"
40 * (I was using a hash storing the charpos of the token and associating
41 * the action) but it is tedious too. Simpler to use mutable/ref. We
42 * use the same idea that we use when working on the Ast_c. *)
43
44(* old: when I was using the list of "actions" next to the views, the hash
45 * indexed by the charpos, there could have been some problems:
46 * how my fake_pos interact with the way I tag and adjust token ?
47 * because I base my tagging on the position of the token ! so sometimes
ae4735db 48 * could tag another fakeInfo that should not be tagged ?
708f4980
C
49 * fortunately I don't use anymore this technique.
50 *)
51
52(* update: quite close to the Place_c.Inxxx *)
ae4735db 53type context =
708f4980
C
54 InFunction | InEnum | InStruct | InInitializer | NoContext
55
ae4735db 56type token_extended = {
708f4980
C
57 mutable tok: Parser_c.token;
58 mutable where: context;
59
60 (* less: need also a after ? *)
61 mutable new_tokens_before : Parser_c.token list;
62
63 (* line x col cache, more easily accessible, of the info in the token *)
ae4735db 64 line: int;
708f4980
C
65 col : int;
66}
67
68(* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
69 * can confuse the parser.
70 *)
ae4735db
C
71let set_as_comment cppkind x =
72 if TH.is_eof x.tok
708f4980 73 then () (* otherwise parse_c will be lost if don't find a EOF token *)
ae4735db 74 else
708f4980
C
75 x.tok <- TCommentCpp (cppkind, TH.info_of_tok x.tok)
76
feec80c3
C
77let save_as_comment cppkind x =
78 if TH.is_eof x.tok
79 then () (* otherwise parse_c will be lost if don't find a EOF token *)
80 else
81 let t =
82 match x.tok with
83 TIfdef _ | TIfdefMisc _ | TIfdefVersion _ -> Token_c.IfDef
84 | TIfdefBool _ -> Token_c.IfDef0
85 | TIfdefelse _ | TIfdefelif _ -> Token_c.Else
86 | TEndif _ -> Token_c.Endif
87 | _ -> Token_c.Other in
88 x.tok <- TCommentCpp (cppkind t, TH.info_of_tok x.tok)
89
ae4735db 90let mk_token_extended x =
708f4980 91 let (line, col) = TH.linecol_of_tok x in
ae4735db
C
92 { tok = x;
93 line = line; col = col;
94 where = NoContext;
708f4980
C
95 new_tokens_before = [];
96 }
97
98
ae4735db 99let rebuild_tokens_extented toks_ext =
708f4980 100 let _tokens = ref [] in
ae4735db 101 toks_ext +> List.iter (fun tok ->
708f4980 102 tok.new_tokens_before +> List.iter (fun x -> push2 x _tokens);
ae4735db 103 push2 tok.tok _tokens
708f4980
C
104 );
105 let tokens = List.rev !_tokens in
106 (tokens +> acc_map mk_token_extended)
107
108
109
ae4735db
C
110(* x list list, because x list separated by ',' *)
111type paren_grouped =
708f4980
C
112 | Parenthised of paren_grouped list list * token_extended list
113 | PToken of token_extended
114
ae4735db
C
115type brace_grouped =
116 | Braceised of
708f4980
C
117 brace_grouped list list * token_extended * token_extended option
118 | BToken of token_extended
119
120(* Far better data structure than doing hacks in the lexer or parser
121 * because in lexer we don't know to which ifdef a endif is related
122 * and so when we want to comment a ifdef, we don't know which endif
123 * we must also comment. Especially true for the #if 0 which sometimes
124 * have a #else part.
ae4735db
C
125 *
126 * x list list, because x list separated by #else or #elif
127 *)
128type ifdef_grouped =
708f4980
C
129 | Ifdef of ifdef_grouped list list * token_extended list
130 | Ifdefbool of bool * ifdef_grouped list list * token_extended list
131 | NotIfdefLine of token_extended list
132
133
ae4735db 134type 'a line_grouped =
708f4980
C
135 Line of 'a list
136
137
ae4735db 138type body_function_grouped =
708f4980
C
139 | BodyFunction of token_extended list
140 | NotBodyLine of token_extended list
141
142
143(* ------------------------------------------------------------------------- *)
144(* view builders *)
145(* ------------------------------------------------------------------------- *)
146
ae4735db 147(* todo: synchro ! use more indentation
708f4980
C
148 * if paren not closed and same indentation level, certainly because
149 * part of a mid-ifdef-expression.
150*)
ae4735db 151let rec mk_parenthised xs =
951c7801
C
152 let rec loop acc = function
153 | [] -> acc
ae4735db
C
154 | x::xs ->
155 (match x.tok with
156 | TOPar _ | TOParDefine _ ->
708f4980 157 let body, extras, xs = mk_parameters [x] [] xs in
951c7801 158 loop (Parenthised (body,extras)::acc) xs
ae4735db 159 | _ ->
951c7801
C
160 loop (PToken x::acc) xs
161 ) in
162 List.rev(loop [] xs)
708f4980
C
163
164(* return the body of the parenthised expression and the rest of the tokens *)
ae4735db 165and mk_parameters extras acc_before_sep xs =
708f4980 166 match xs with
ae4735db 167 | [] ->
708f4980
C
168 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
169 pr2 "PB: not found closing paren in fuzzy parsing";
170 [List.rev acc_before_sep], List.rev extras, []
ae4735db
C
171 | x::xs ->
172 (match x.tok with
708f4980 173 (* synchro *)
ae4735db 174 | TOBrace _ when x.col =|= 0 ->
708f4980
C
175 pr2 "PB: found synchro point } in paren";
176 [List.rev acc_before_sep], List.rev (extras), (x::xs)
177
ae4735db 178 | TCPar _ | TCParEOL _ ->
708f4980 179 [List.rev acc_before_sep], List.rev (x::extras), xs
ae4735db 180 | TOPar _ | TOParDefine _ ->
708f4980 181 let body, extrasnest, xs = mk_parameters [x] [] xs in
ae4735db
C
182 mk_parameters extras
183 (Parenthised (body,extrasnest)::acc_before_sep)
708f4980 184 xs
ae4735db 185 | TComma _ ->
708f4980 186 let body, extras, xs = mk_parameters (x::extras) [] xs in
ae4735db
C
187 (List.rev acc_before_sep)::body, extras, xs
188 | _ ->
708f4980
C
189 mk_parameters extras (PToken x::acc_before_sep) xs
190 )
191
192
193
194
ae4735db 195let rec mk_braceised xs =
951c7801
C
196 let rec loop acc = function
197 | [] -> acc
ae4735db
C
198 | x::xs ->
199 (match x.tok with
200 | TOBrace _ ->
951c7801
C
201 let body, endbrace, xs = mk_braceised_aux [] xs in
202 loop (Braceised (body, x, endbrace)::acc) xs
ae4735db 203 | TCBrace _ ->
951c7801
C
204 pr2 "PB: found closing brace alone in fuzzy parsing";
205 loop (BToken x::acc) xs
ae4735db 206 | _ ->
951c7801
C
207 loop (BToken x::acc) xs) in
208 List.rev(loop [] xs)
708f4980
C
209
210(* return the body of the parenthised expression and the rest of the tokens *)
ae4735db 211and mk_braceised_aux acc xs =
708f4980 212 match xs with
ae4735db 213 | [] ->
708f4980
C
214 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
215 pr2 "PB: not found closing brace in fuzzy parsing";
216 [List.rev acc], None, []
ae4735db
C
217 | x::xs ->
218 (match x.tok with
708f4980 219 | TCBrace _ -> [List.rev acc], Some x, xs
ae4735db 220 | TOBrace _ ->
708f4980
C
221 let body, endbrace, xs = mk_braceised_aux [] xs in
222 mk_braceised_aux (Braceised (body,x, endbrace)::acc) xs
ae4735db 223 | _ ->
708f4980
C
224 mk_braceised_aux (BToken x::acc) xs
225 )
226
708f4980
C
227
228
ae4735db
C
229
230let rec mk_ifdef xs =
708f4980
C
231 match xs with
232 | [] -> []
ae4735db
C
233 | x::xs ->
234 (match x.tok with
235 | TIfdef _ ->
708f4980
C
236 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
237 Ifdef (body, extra)::mk_ifdef xs
ae4735db 238 | TIfdefBool (b,_, _) ->
708f4980 239 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
ae4735db 240
708f4980
C
241 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
242 if !Flag_parsing_c.if0_passing
243 then Ifdefbool (b, body, extra)::mk_ifdef xs
244 else Ifdef(body, extra)::mk_ifdef xs
245
ae4735db 246 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
708f4980
C
247 let body, extra, xs = mk_ifdef_parameters [x] [] xs in
248 Ifdefbool (b, body, extra)::mk_ifdef xs
249
ae4735db
C
250
251 | _ ->
708f4980
C
252 (* todo? can have some Ifdef in the line ? *)
253 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
ae4735db 254 NotIfdefLine line::mk_ifdef xs
708f4980
C
255 )
256
ae4735db 257and mk_ifdef_parameters extras acc_before_sep xs =
708f4980 258 match xs with
ae4735db 259 | [] ->
708f4980
C
260 (* Note that mk_ifdef is assuming that CPP instruction are alone
261 * on their line. Because I do a span (fun x -> is_same_line ...)
262 * I might take with me a #endif if this one is mixed on a line
263 * with some "normal" tokens.
264 *)
265 pr2 "PB: not found closing ifdef in fuzzy parsing";
266 [List.rev acc_before_sep], List.rev extras, []
ae4735db
C
267 | x::xs ->
268 (match x.tok with
269 | TEndif _ ->
708f4980 270 [List.rev acc_before_sep], List.rev (x::extras), xs
ae4735db 271 | TIfdef _ ->
708f4980 272 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
ae4735db 273 mk_ifdef_parameters
708f4980
C
274 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
275
ae4735db 276 | TIfdefBool (b,_,_) ->
708f4980
C
277 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
278
279 if !Flag_parsing_c.if0_passing
280 then
ae4735db 281 mk_ifdef_parameters
708f4980 282 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
ae4735db
C
283 else
284 mk_ifdef_parameters
708f4980
C
285 extras (Ifdef (body, extrasnest)::acc_before_sep) xs
286
287
ae4735db 288 | TIfdefMisc (b,_,_) | TIfdefVersion (b,_,_) ->
708f4980 289 let body, extrasnest, xs = mk_ifdef_parameters [x] [] xs in
ae4735db 290 mk_ifdef_parameters
708f4980
C
291 extras (Ifdefbool (b, body, extrasnest)::acc_before_sep) xs
292
ae4735db
C
293 | TIfdefelse _
294 | TIfdefelif _ ->
708f4980 295 let body, extras, xs = mk_ifdef_parameters (x::extras) [] xs in
ae4735db
C
296 (List.rev acc_before_sep)::body, extras, xs
297 | _ ->
708f4980
C
298 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
299 mk_ifdef_parameters extras (NotIfdefLine line::acc_before_sep) xs
300 )
301
302(* --------------------------------------- *)
303
304let line_of_paren = function
305 | PToken x -> x.line
ae4735db 306 | Parenthised (xxs, info_parens) ->
708f4980 307 (match info_parens with
abad11c5 308 | [] -> raise (Impossible 121)
708f4980
C
309 | x::xs -> x.line
310 )
311
312
313let rec span_line_paren line = function
314 | [] -> [],[]
ae4735db 315 | x::xs ->
708f4980 316 (match x with
ae4735db 317 | PToken tok when TH.is_eof tok.tok ->
708f4980 318 [], x::xs
ae4735db
C
319 | _ ->
320 if line_of_paren x =|= line
708f4980
C
321 then
322 let (l1, l2) = span_line_paren line xs in
323 (x::l1, l2)
324 else ([], x::xs)
325 )
708f4980 326
ae4735db
C
327
328let rec mk_line_parenthised xs =
708f4980
C
329 match xs with
330 | [] -> []
ae4735db 331 | x::xs ->
708f4980
C
332 let line_no = line_of_paren x in
333 let line, xs = span_line_paren line_no xs in
334 Line (x::line)::mk_line_parenthised xs
335
336
337(* --------------------------------------- *)
ae4735db
C
338let rec mk_body_function_grouped xs =
339 match xs with
708f4980 340 | [] -> []
ae4735db 341 | x::xs ->
708f4980 342 (match x with
ae4735db
C
343 | {tok = TOBrace _; col = 0} ->
344 let is_closing_brace = function
345 | {tok = TCBrace _; col = 0 } -> true
346 | _ -> false
708f4980
C
347 in
348 let body, xs = Common.span (fun x -> not (is_closing_brace x)) xs in
349 (match xs with
ae4735db 350 | ({tok = TCBrace _; col = 0 })::xs ->
708f4980 351 BodyFunction body::mk_body_function_grouped xs
ae4735db 352 | [] ->
708f4980
C
353 pr2 "PB:not found closing brace in fuzzy parsing";
354 [NotBodyLine body]
abad11c5 355 | _ -> raise (Impossible 122)
708f4980 356 )
ae4735db
C
357
358 | _ ->
708f4980 359 let line, xs = Common.span (fun y -> y.line =|= x.line) (x::xs) in
ae4735db 360 NotBodyLine line::mk_body_function_grouped xs
708f4980
C
361 )
362
363
364(* ------------------------------------------------------------------------- *)
365(* view iterators *)
366(* ------------------------------------------------------------------------- *)
367
ae4735db 368let rec iter_token_paren f xs =
708f4980
C
369 xs +> List.iter (function
370 | PToken tok -> f tok;
ae4735db 371 | Parenthised (xxs, info_parens) ->
708f4980
C
372 info_parens +> List.iter f;
373 xxs +> List.iter (fun xs -> iter_token_paren f xs)
374 )
375
ae4735db 376let rec iter_token_brace f xs =
708f4980
C
377 xs +> List.iter (function
378 | BToken tok -> f tok;
ae4735db 379 | Braceised (xxs, tok1, tok2opt) ->
708f4980
C
380 f tok1; do_option f tok2opt;
381 xxs +> List.iter (fun xs -> iter_token_brace f xs)
382 )
383
ae4735db 384let rec iter_token_ifdef f xs =
708f4980
C
385 xs +> List.iter (function
386 | NotIfdefLine xs -> xs +> List.iter f;
ae4735db
C
387 | Ifdefbool (_, xxs, info_ifdef)
388 | Ifdef (xxs, info_ifdef) ->
708f4980
C
389 info_ifdef +> List.iter f;
390 xxs +> List.iter (iter_token_ifdef f)
391 )
392
393
394
395
ae4735db 396let tokens_of_paren xs =
708f4980
C
397 let g = ref [] in
398 xs +> iter_token_paren (fun tok -> push2 tok g);
399 List.rev !g
400
401
ae4735db 402let tokens_of_paren_ordered xs =
708f4980
C
403 let g = ref [] in
404
405 let rec aux_tokens_ordered = function
406 | PToken tok -> push2 tok g;
ae4735db
C
407 | Parenthised (xxs, info_parens) ->
408 let (opar, cpar, commas) =
708f4980 409 match info_parens with
ae4735db 410 | opar::xs ->
708f4980 411 (match List.rev xs with
ae4735db 412 | cpar::xs ->
708f4980 413 opar, cpar, List.rev xs
abad11c5 414 | _ -> raise (Impossible 123)
708f4980 415 )
abad11c5 416 | _ -> raise (Impossible 124)
708f4980
C
417 in
418 push2 opar g;
419 aux_args (xxs,commas);
420 push2 cpar g;
421
422 and aux_args (xxs, commas) =
423 match xxs, commas with
424 | [], [] -> ()
425 | [xs], [] -> xs +> List.iter aux_tokens_ordered
ae4735db 426 | xs::ys::xxs, comma::commas ->
708f4980
C
427 xs +> List.iter aux_tokens_ordered;
428 push2 comma g;
429 aux_args (ys::xxs, commas)
abad11c5 430 | _ -> raise (Impossible 125)
708f4980
C
431
432 in
433
434 xs +> List.iter aux_tokens_ordered;
435 List.rev !g
436
437
438
439(* ------------------------------------------------------------------------- *)
440(* set the context info in token *)
441(* ------------------------------------------------------------------------- *)
442
443
ae4735db
C
444let rec set_in_function_tag xs =
445 (* could try: ) { } but it can be the ) of a if or while, so
708f4980
C
446 * better to base the heuristic on the position in column zero.
447 * Note that some struct or enum or init put also their { in first column
448 * but set_in_other will overwrite the previous InFunction tag.
449 *)
450 match xs with
451 | [] -> ()
452 (* ) { and the closing } is in column zero, then certainly a function *)
ae4735db
C
453 | BToken ({tok = TCPar _ })::(Braceised (body, tok1, Some tok2))::xs
454 when tok1.col <> 0 && tok2.col =|= 0 ->
455 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
456 tok.where <- InFunction
457 ));
458 set_in_function_tag xs
459
460 | (BToken x)::xs -> set_in_function_tag xs
461
ae4735db
C
462 | (Braceised (body, tok1, Some tok2))::xs
463 when tok1.col =|= 0 && tok2.col =|= 0 ->
464 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
465 tok.where <- InFunction
466 ));
467 set_in_function_tag xs
ae4735db 468 | Braceised (body, tok1, tok2)::xs ->
708f4980 469 set_in_function_tag xs
708f4980 470
ae4735db
C
471
472let rec set_in_other xs =
473 match xs with
708f4980
C
474 | [] -> ()
475 (* enum x { } *)
476 | BToken ({tok = Tenum _})::BToken ({tok = TIdent _})
ae4735db 477 ::Braceised(body, tok1, tok2)::xs
708f4980 478 | BToken ({tok = Tenum _})
ae4735db
C
479 ::Braceised(body, tok1, tok2)::xs
480 ->
481 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
482 tok.where <- InEnum;
483 ));
484 set_in_other xs
485
486 (* struct x { } *)
487 | BToken ({tok = Tstruct _})::BToken ({tok = TIdent _})
ae4735db
C
488 ::Braceised(body, tok1, tok2)::xs ->
489 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
490 tok.where <- InStruct;
491 ));
492 set_in_other xs
493 (* = { } *)
494 | BToken ({tok = TEq _})
ae4735db
C
495 ::Braceised(body, tok1, tok2)::xs ->
496 body +> List.iter (iter_token_brace (fun tok ->
708f4980
C
497 tok.where <- InInitializer;
498 ));
499 set_in_other xs
500
501 | BToken _::xs -> set_in_other xs
502
ae4735db 503 | Braceised(body, tok1, tok2)::xs ->
708f4980
C
504 body +> List.iter set_in_other;
505 set_in_other xs
506
708f4980 507
ae4735db
C
508
509
510let set_context_tag xs =
708f4980
C
511 begin
512 set_in_function_tag xs;
513 set_in_other xs;
514 end
ae4735db 515