3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
17 module TH
= Token_helpers
21 (*****************************************************************************)
22 (* Some debugging functions *)
23 (*****************************************************************************)
25 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
27 (* ------------------------------------------------------------------------- *)
28 (* fuzzy parsing, different "views" over the same program *)
29 (* ------------------------------------------------------------------------- *)
32 (* Normally I should not use ref/mutable in the token_extended type
33 * and I should have a set of functions taking a list of tokens and
34 * returning a list of tokens. The problem is that to make easier some
35 * functions, it is better to work on better representation, on "views"
36 * over this list of tokens. But then modifying those views and get
37 * back from those views to the original simple list of tokens is
38 * tedious. One way is to maintain next to the view a list of "actions"
39 * (I was using a hash storing the charpos of the token and associating
40 * the action) but it is tedious too. Simpler to use mutable/ref. We
41 * use the same idea that we use when working on the Ast_c. *)
43 (* old: when I was using the list of "actions" next to the views, the hash
44 * indexed by the charpos, there could have been some problems:
45 * how my fake_pos interact with the way I tag and adjust token ?
46 * because I base my tagging on the position of the token ! so sometimes
47 * could tag another fakeInfo that should not be tagged ?
48 * fortunately I don't use anymore this technique.
51 (* update: quite close to the Place_c.Inxxx *)
53 InFunction
| InEnum
| InStruct
| InInitializer
| NoContext
55 type token_extended
= {
56 mutable tok
: Parser_c.token
;
57 mutable where
: context
;
59 (* less: need also a after ? *)
60 mutable new_tokens_before
: Parser_c.token list
;
62 (* line x col cache, more easily accessible, of the info in the token *)
67 (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
68 * can confuse the parser.
70 let set_as_comment cppkind x
=
72 then () (* otherwise parse_c will be lost if don't find a EOF token *)
74 x
.tok
<- TCommentCpp
(cppkind
, TH.info_of_tok x
.tok
)
76 let mk_token_extended x
=
77 let (line
, col
) = TH.linecol_of_tok x
in
79 line
= line
; col
= col
;
81 new_tokens_before
= [];
85 let rebuild_tokens_extented toks_ext
=
86 let _tokens = ref [] in
87 toks_ext
+> List.iter
(fun tok
->
88 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
91 let tokens = List.rev
!_tokens in
92 (tokens +> acc_map
mk_token_extended)
96 (* x list list, because x list separated by ',' *)
98 | Parenthised
of paren_grouped list list
* token_extended list
99 | PToken
of token_extended
103 brace_grouped list list
* token_extended
* token_extended
option
104 | BToken
of token_extended
106 (* Far better data structure than doing hacks in the lexer or parser
107 * because in lexer we don't know to which ifdef a endif is related
108 * and so when we want to comment a ifdef, we don't know which endif
109 * we must also comment. Especially true for the #if 0 which sometimes
112 * x list list, because x list separated by #else or #elif
115 | Ifdef
of ifdef_grouped list list
* token_extended list
116 | Ifdefbool
of bool * ifdef_grouped list list
* token_extended list
117 | NotIfdefLine
of token_extended list
120 type 'a line_grouped
=
124 type body_function_grouped
=
125 | BodyFunction
of token_extended list
126 | NotBodyLine
of token_extended list
129 (* ------------------------------------------------------------------------- *)
131 (* ------------------------------------------------------------------------- *)
133 (* todo: synchro ! use more indentation
134 * if paren not closed and same indentation level, certainly because
135 * part of a mid-ifdef-expression.
137 let rec mk_parenthised xs
=
142 | TOPar _
| TOParDefine _
->
143 let body, extras
, xs
= mk_parameters
[x
] [] xs
in
144 Parenthised
(body,extras
)::mk_parenthised xs
146 PToken x
::mk_parenthised xs
149 (* return the body of the parenthised expression and the rest of the tokens *)
150 and mk_parameters extras acc_before_sep xs
=
153 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
154 pr2 "PB: not found closing paren in fuzzy parsing";
155 [List.rev acc_before_sep
], List.rev extras
, []
159 | TOBrace _
when x
.col
=|= 0 ->
160 pr2 "PB: found synchro point } in paren";
161 [List.rev acc_before_sep
], List.rev
(extras
), (x
::xs
)
163 | TCPar _
| TCParEOL _
->
164 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
165 | TOPar _
| TOParDefine _
->
166 let body, extrasnest
, xs
= mk_parameters
[x
] [] xs
in
168 (Parenthised
(body,extrasnest
)::acc_before_sep
)
171 let body, extras
, xs
= mk_parameters
(x
::extras
) [] xs
in
172 (List.rev acc_before_sep
)::body, extras
, xs
174 mk_parameters extras
(PToken x
::acc_before_sep
) xs
180 let rec mk_braceised xs
=
186 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
187 Braceised
(body, x
, endbrace
)::mk_braceised xs
189 pr2 "PB: found closing brace alone in fuzzy parsing";
190 BToken x
::mk_braceised xs
192 BToken x
::mk_braceised xs
195 (* return the body of the parenthised expression and the rest of the tokens *)
196 and mk_braceised_aux acc xs
=
199 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
200 pr2 "PB: not found closing brace in fuzzy parsing";
201 [List.rev acc
], None
, []
204 | TCBrace _
-> [List.rev acc
], Some x
, xs
206 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
207 mk_braceised_aux
(Braceised
(body,x
, endbrace
)::acc
) xs
209 mk_braceised_aux
(BToken x
::acc
) xs
215 let rec mk_ifdef xs
=
221 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
222 Ifdef
(body, extra
)::mk_ifdef xs
223 | TIfdefBool
(b
,_
, _
) ->
224 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
226 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
227 if !Flag_parsing_c.if0_passing
228 then Ifdefbool
(b
, body, extra
)::mk_ifdef xs
229 else Ifdef
(body, extra
)::mk_ifdef xs
231 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
232 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
233 Ifdefbool
(b
, body, extra
)::mk_ifdef xs
237 (* todo? can have some Ifdef in the line ? *)
238 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
239 NotIfdefLine
line::mk_ifdef xs
242 and mk_ifdef_parameters extras acc_before_sep xs
=
245 (* Note that mk_ifdef is assuming that CPP instruction are alone
246 * on their line. Because I do a span (fun x -> is_same_line ...)
247 * I might take with me a #endif if this one is mixed on a line
248 * with some "normal" tokens.
250 pr2 "PB: not found closing ifdef in fuzzy parsing";
251 [List.rev acc_before_sep
], List.rev extras
, []
255 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
257 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
259 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
261 | TIfdefBool
(b
,_
,_
) ->
262 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
264 if !Flag_parsing_c.if0_passing
267 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
270 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
273 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
274 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
276 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
280 let body, extras
, xs
= mk_ifdef_parameters
(x
::extras
) [] xs
in
281 (List.rev acc_before_sep
)::body, extras
, xs
283 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
284 mk_ifdef_parameters extras
(NotIfdefLine
line::acc_before_sep
) xs
287 (* --------------------------------------- *)
289 let line_of_paren = function
291 | Parenthised
(xxs
, info_parens
) ->
292 (match info_parens
with
293 | [] -> raise Impossible
298 let rec span_line_paren line = function
302 | PToken tok
when TH.is_eof tok
.tok
->
305 if line_of_paren x
=|= line
307 let (l1
, l2
) = span_line_paren line xs
in
313 let rec mk_line_parenthised xs
=
317 let line_no = line_of_paren x
in
318 let line, xs
= span_line_paren line_no xs
in
319 Line
(x
::line)::mk_line_parenthised xs
322 (* --------------------------------------- *)
323 let rec mk_body_function_grouped xs
=
328 | {tok
= TOBrace _
; col
= 0} ->
329 let is_closing_brace = function
330 | {tok
= TCBrace _
; col
= 0 } -> true
333 let body, xs
= Common.span
(fun x
-> not
(is_closing_brace x
)) xs
in
335 | ({tok
= TCBrace _
; col
= 0 })::xs
->
336 BodyFunction
body::mk_body_function_grouped xs
338 pr2 "PB:not found closing brace in fuzzy parsing";
340 | _
-> raise Impossible
344 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
345 NotBodyLine
line::mk_body_function_grouped xs
349 (* ------------------------------------------------------------------------- *)
351 (* ------------------------------------------------------------------------- *)
353 let rec iter_token_paren f xs
=
354 xs
+> List.iter
(function
355 | PToken tok
-> f tok
;
356 | Parenthised
(xxs
, info_parens
) ->
357 info_parens
+> List.iter f
;
358 xxs
+> List.iter
(fun xs
-> iter_token_paren f xs
)
361 let rec iter_token_brace f xs
=
362 xs
+> List.iter
(function
363 | BToken tok
-> f tok
;
364 | Braceised
(xxs
, tok1
, tok2opt
) ->
365 f tok1
; do_option f tok2opt
;
366 xxs
+> List.iter
(fun xs
-> iter_token_brace f xs
)
369 let rec iter_token_ifdef f xs
=
370 xs
+> List.iter
(function
371 | NotIfdefLine xs
-> xs
+> List.iter f
;
372 | Ifdefbool
(_
, xxs
, info_ifdef
)
373 | Ifdef
(xxs
, info_ifdef
) ->
374 info_ifdef
+> List.iter f
;
375 xxs
+> List.iter
(iter_token_ifdef f
)
381 let tokens_of_paren xs
=
383 xs
+> iter_token_paren (fun tok
-> push2 tok
g);
387 let tokens_of_paren_ordered xs
=
390 let rec aux_tokens_ordered = function
391 | PToken tok
-> push2 tok
g;
392 | Parenthised
(xxs
, info_parens
) ->
393 let (opar
, cpar
, commas
) =
394 match info_parens
with
396 (match List.rev xs
with
398 opar
, cpar
, List.rev xs
399 | _
-> raise Impossible
401 | _
-> raise Impossible
404 aux_args
(xxs
,commas
);
407 and aux_args
(xxs
, commas
) =
408 match xxs
, commas
with
410 | [xs
], [] -> xs
+> List.iter
aux_tokens_ordered
411 | xs
::ys
::xxs
, comma
::commas
->
412 xs
+> List.iter
aux_tokens_ordered;
414 aux_args
(ys
::xxs
, commas
)
415 | _
-> raise Impossible
419 xs
+> List.iter
aux_tokens_ordered;
424 (* ------------------------------------------------------------------------- *)
425 (* set the context info in token *)
426 (* ------------------------------------------------------------------------- *)
429 let rec set_in_function_tag xs
=
430 (* could try: ) { } but it can be the ) of a if or while, so
431 * better to base the heuristic on the position in column zero.
432 * Note that some struct or enum or init put also their { in first column
433 * but set_in_other will overwrite the previous InFunction tag.
437 (* ) { and the closing } is in column zero, then certainly a function *)
438 | BToken
({tok
= TCPar _
})::(Braceised
(body, tok1
, Some tok2
))::xs
439 when tok1
.col
<> 0 && tok2
.col
=|= 0 ->
440 body +> List.iter
(iter_token_brace (fun tok
->
441 tok
.where
<- InFunction
443 set_in_function_tag xs
445 | (BToken x
)::xs
-> set_in_function_tag xs
447 | (Braceised
(body, tok1
, Some tok2
))::xs
448 when tok1
.col
=|= 0 && tok2
.col
=|= 0 ->
449 body +> List.iter
(iter_token_brace (fun tok
->
450 tok
.where
<- InFunction
452 set_in_function_tag xs
453 | Braceised
(body, tok1
, tok2
)::xs
->
454 set_in_function_tag xs
457 let rec set_in_other xs
=
461 | BToken
({tok
= Tenum _
})::BToken
({tok
= TIdent _
})
462 ::Braceised
(body, tok1
, tok2
)::xs
463 | BToken
({tok
= Tenum _
})
464 ::Braceised
(body, tok1
, tok2
)::xs
466 body +> List.iter
(iter_token_brace (fun tok
->
472 | BToken
({tok
= Tstruct _
})::BToken
({tok
= TIdent _
})
473 ::Braceised
(body, tok1
, tok2
)::xs
->
474 body +> List.iter
(iter_token_brace (fun tok
->
475 tok
.where
<- InStruct
;
479 | BToken
({tok
= TEq _
})
480 ::Braceised
(body, tok1
, tok2
)::xs
->
481 body +> List.iter
(iter_token_brace (fun tok
->
482 tok
.where
<- InInitializer
;
486 | BToken _
::xs
-> set_in_other xs
488 | Braceised
(body, tok1
, tok2
)::xs
->
489 body +> List.iter
set_in_other;
495 let set_context_tag xs
=
497 set_in_function_tag xs
;