3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
17 module TH
= Token_helpers
21 (*****************************************************************************)
22 (* Some debugging functions *)
23 (*****************************************************************************)
25 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
27 (* ------------------------------------------------------------------------- *)
28 (* fuzzy parsing, different "views" over the same program *)
29 (* ------------------------------------------------------------------------- *)
32 (* Normally I should not use ref/mutable in the token_extended type
33 * and I should have a set of functions taking a list of tokens and
34 * returning a list of tokens. The problem is that to make easier some
35 * functions, it is better to work on better representation, on "views"
36 * over this list of tokens. But then modifying those views and get
37 * back from those views to the original simple list of tokens is
38 * tedious. One way is to maintain next to the view a list of "actions"
39 * (I was using a hash storing the charpos of the token and associating
40 * the action) but it is tedious too. Simpler to use mutable/ref. We
41 * use the same idea that we use when working on the Ast_c. *)
43 (* old: when I was using the list of "actions" next to the views, the hash
44 * indexed by the charpos, there could have been some problems:
45 * how my fake_pos interact with the way I tag and adjust token ?
46 * because I base my tagging on the position of the token ! so sometimes
47 * could tag another fakeInfo that should not be tagged ?
48 * fortunately I don't use anymore this technique.
51 (* update: quite close to the Place_c.Inxxx *)
53 InFunction
| InEnum
| InStruct
| InInitializer
| NoContext
55 type token_extended
= {
56 mutable tok
: Parser_c.token
;
57 mutable where
: context
;
59 (* less: need also a after ? *)
60 mutable new_tokens_before
: Parser_c.token list
;
62 (* line x col cache, more easily accessible, of the info in the token *)
67 (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
68 * can confuse the parser.
70 let set_as_comment cppkind x
=
72 then () (* otherwise parse_c will be lost if don't find a EOF token *)
74 x
.tok
<- TCommentCpp
(cppkind
, TH.info_of_tok x
.tok
)
76 let mk_token_extended x
=
77 let (line
, col
) = TH.linecol_of_tok x
in
79 line
= line
; col
= col
;
81 new_tokens_before
= [];
85 let rebuild_tokens_extented toks_ext
=
86 let _tokens = ref [] in
87 toks_ext
+> List.iter
(fun tok
->
88 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
91 let tokens = List.rev
!_tokens in
92 (tokens +> acc_map
mk_token_extended)
96 (* x list list, because x list separated by ',' *)
98 | Parenthised
of paren_grouped list list
* token_extended list
99 | PToken
of token_extended
103 brace_grouped list list
* token_extended
* token_extended
option
104 | BToken
of token_extended
106 (* Far better data structure than doing hacks in the lexer or parser
107 * because in lexer we don't know to which ifdef a endif is related
108 * and so when we want to comment a ifdef, we don't know which endif
109 * we must also comment. Especially true for the #if 0 which sometimes
112 * x list list, because x list separated by #else or #elif
115 | Ifdef
of ifdef_grouped list list
* token_extended list
116 | Ifdefbool
of bool * ifdef_grouped list list
* token_extended list
117 | NotIfdefLine
of token_extended list
120 type 'a line_grouped
=
124 type body_function_grouped
=
125 | BodyFunction
of token_extended list
126 | NotBodyLine
of token_extended list
129 (* ------------------------------------------------------------------------- *)
131 (* ------------------------------------------------------------------------- *)
133 (* todo: synchro ! use more indentation
134 * if paren not closed and same indentation level, certainly because
135 * part of a mid-ifdef-expression.
137 let rec mk_parenthised xs
=
138 let rec loop acc
= function
142 | TOPar _
| TOParDefine _
->
143 let body, extras
, xs
= mk_parameters
[x
] [] xs
in
144 loop (Parenthised
(body,extras
)::acc
) xs
146 loop (PToken x
::acc
) xs
150 (* return the body of the parenthised expression and the rest of the tokens *)
151 and mk_parameters extras acc_before_sep xs
=
154 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
155 pr2 "PB: not found closing paren in fuzzy parsing";
156 [List.rev acc_before_sep
], List.rev extras
, []
160 | TOBrace _
when x
.col
=|= 0 ->
161 pr2 "PB: found synchro point } in paren";
162 [List.rev acc_before_sep
], List.rev
(extras
), (x
::xs
)
164 | TCPar _
| TCParEOL _
->
165 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
166 | TOPar _
| TOParDefine _
->
167 let body, extrasnest
, xs
= mk_parameters
[x
] [] xs
in
169 (Parenthised
(body,extrasnest
)::acc_before_sep
)
172 let body, extras
, xs
= mk_parameters
(x
::extras
) [] xs
in
173 (List.rev acc_before_sep
)::body, extras
, xs
175 mk_parameters extras
(PToken x
::acc_before_sep
) xs
181 let rec mk_braceised xs
=
182 let rec loop acc
= function
187 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
188 loop (Braceised
(body, x
, endbrace
)::acc
) xs
190 pr2 "PB: found closing brace alone in fuzzy parsing";
191 loop (BToken x
::acc
) xs
193 loop (BToken x
::acc
) xs
) in
196 (* return the body of the parenthised expression and the rest of the tokens *)
197 and mk_braceised_aux acc xs
=
200 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
201 pr2 "PB: not found closing brace in fuzzy parsing";
202 [List.rev acc
], None
, []
205 | TCBrace _
-> [List.rev acc
], Some x
, xs
207 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
208 mk_braceised_aux
(Braceised
(body,x
, endbrace
)::acc
) xs
210 mk_braceised_aux
(BToken x
::acc
) xs
216 let rec mk_ifdef xs
=
222 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
223 Ifdef
(body, extra
)::mk_ifdef xs
224 | TIfdefBool
(b
,_
, _
) ->
225 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
227 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
228 if !Flag_parsing_c.if0_passing
229 then Ifdefbool
(b
, body, extra
)::mk_ifdef xs
230 else Ifdef
(body, extra
)::mk_ifdef xs
232 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
233 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
234 Ifdefbool
(b
, body, extra
)::mk_ifdef xs
238 (* todo? can have some Ifdef in the line ? *)
239 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
240 NotIfdefLine
line::mk_ifdef xs
243 and mk_ifdef_parameters extras acc_before_sep xs
=
246 (* Note that mk_ifdef is assuming that CPP instruction are alone
247 * on their line. Because I do a span (fun x -> is_same_line ...)
248 * I might take with me a #endif if this one is mixed on a line
249 * with some "normal" tokens.
251 pr2 "PB: not found closing ifdef in fuzzy parsing";
252 [List.rev acc_before_sep
], List.rev extras
, []
256 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
258 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
260 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
262 | TIfdefBool
(b
,_
,_
) ->
263 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
265 if !Flag_parsing_c.if0_passing
268 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
271 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
274 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
275 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
277 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
281 let body, extras
, xs
= mk_ifdef_parameters
(x
::extras
) [] xs
in
282 (List.rev acc_before_sep
)::body, extras
, xs
284 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
285 mk_ifdef_parameters extras
(NotIfdefLine
line::acc_before_sep
) xs
288 (* --------------------------------------- *)
290 let line_of_paren = function
292 | Parenthised
(xxs
, info_parens
) ->
293 (match info_parens
with
294 | [] -> raise Impossible
299 let rec span_line_paren line = function
303 | PToken tok
when TH.is_eof tok
.tok
->
306 if line_of_paren x
=|= line
308 let (l1
, l2
) = span_line_paren line xs
in
314 let rec mk_line_parenthised xs
=
318 let line_no = line_of_paren x
in
319 let line, xs
= span_line_paren line_no xs
in
320 Line
(x
::line)::mk_line_parenthised xs
323 (* --------------------------------------- *)
324 let rec mk_body_function_grouped xs
=
329 | {tok
= TOBrace _
; col
= 0} ->
330 let is_closing_brace = function
331 | {tok
= TCBrace _
; col
= 0 } -> true
334 let body, xs
= Common.span
(fun x
-> not
(is_closing_brace x
)) xs
in
336 | ({tok
= TCBrace _
; col
= 0 })::xs
->
337 BodyFunction
body::mk_body_function_grouped xs
339 pr2 "PB:not found closing brace in fuzzy parsing";
341 | _
-> raise Impossible
345 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
346 NotBodyLine
line::mk_body_function_grouped xs
350 (* ------------------------------------------------------------------------- *)
352 (* ------------------------------------------------------------------------- *)
354 let rec iter_token_paren f xs
=
355 xs
+> List.iter
(function
356 | PToken tok
-> f tok
;
357 | Parenthised
(xxs
, info_parens
) ->
358 info_parens
+> List.iter f
;
359 xxs
+> List.iter
(fun xs
-> iter_token_paren f xs
)
362 let rec iter_token_brace f xs
=
363 xs
+> List.iter
(function
364 | BToken tok
-> f tok
;
365 | Braceised
(xxs
, tok1
, tok2opt
) ->
366 f tok1
; do_option f tok2opt
;
367 xxs
+> List.iter
(fun xs
-> iter_token_brace f xs
)
370 let rec iter_token_ifdef f xs
=
371 xs
+> List.iter
(function
372 | NotIfdefLine xs
-> xs
+> List.iter f
;
373 | Ifdefbool
(_
, xxs
, info_ifdef
)
374 | Ifdef
(xxs
, info_ifdef
) ->
375 info_ifdef
+> List.iter f
;
376 xxs
+> List.iter
(iter_token_ifdef f
)
382 let tokens_of_paren xs
=
384 xs
+> iter_token_paren (fun tok
-> push2 tok
g);
388 let tokens_of_paren_ordered xs
=
391 let rec aux_tokens_ordered = function
392 | PToken tok
-> push2 tok
g;
393 | Parenthised
(xxs
, info_parens
) ->
394 let (opar
, cpar
, commas
) =
395 match info_parens
with
397 (match List.rev xs
with
399 opar
, cpar
, List.rev xs
400 | _
-> raise Impossible
402 | _
-> raise Impossible
405 aux_args
(xxs
,commas
);
408 and aux_args
(xxs
, commas
) =
409 match xxs
, commas
with
411 | [xs
], [] -> xs
+> List.iter
aux_tokens_ordered
412 | xs
::ys
::xxs
, comma
::commas
->
413 xs
+> List.iter
aux_tokens_ordered;
415 aux_args
(ys
::xxs
, commas
)
416 | _
-> raise Impossible
420 xs
+> List.iter
aux_tokens_ordered;
425 (* ------------------------------------------------------------------------- *)
426 (* set the context info in token *)
427 (* ------------------------------------------------------------------------- *)
430 let rec set_in_function_tag xs
=
431 (* could try: ) { } but it can be the ) of a if or while, so
432 * better to base the heuristic on the position in column zero.
433 * Note that some struct or enum or init put also their { in first column
434 * but set_in_other will overwrite the previous InFunction tag.
438 (* ) { and the closing } is in column zero, then certainly a function *)
439 | BToken
({tok
= TCPar _
})::(Braceised
(body, tok1
, Some tok2
))::xs
440 when tok1
.col
<> 0 && tok2
.col
=|= 0 ->
441 body +> List.iter
(iter_token_brace (fun tok
->
442 tok
.where
<- InFunction
444 set_in_function_tag xs
446 | (BToken x
)::xs
-> set_in_function_tag xs
448 | (Braceised
(body, tok1
, Some tok2
))::xs
449 when tok1
.col
=|= 0 && tok2
.col
=|= 0 ->
450 body +> List.iter
(iter_token_brace (fun tok
->
451 tok
.where
<- InFunction
453 set_in_function_tag xs
454 | Braceised
(body, tok1
, tok2
)::xs
->
455 set_in_function_tag xs
458 let rec set_in_other xs
=
462 | BToken
({tok
= Tenum _
})::BToken
({tok
= TIdent _
})
463 ::Braceised
(body, tok1
, tok2
)::xs
464 | BToken
({tok
= Tenum _
})
465 ::Braceised
(body, tok1
, tok2
)::xs
467 body +> List.iter
(iter_token_brace (fun tok
->
473 | BToken
({tok
= Tstruct _
})::BToken
({tok
= TIdent _
})
474 ::Braceised
(body, tok1
, tok2
)::xs
->
475 body +> List.iter
(iter_token_brace (fun tok
->
476 tok
.where
<- InStruct
;
480 | BToken
({tok
= TEq _
})
481 ::Braceised
(body, tok1
, tok2
)::xs
->
482 body +> List.iter
(iter_token_brace (fun tok
->
483 tok
.where
<- InInitializer
;
487 | BToken _
::xs
-> set_in_other xs
489 | Braceised
(body, tok1
, tok2
)::xs
->
490 body +> List.iter
set_in_other;
496 let set_context_tag xs
=
498 set_in_function_tag xs
;