3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
18 module TH
= Token_helpers
22 (*****************************************************************************)
23 (* Some debugging functions *)
24 (*****************************************************************************)
26 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
28 (* ------------------------------------------------------------------------- *)
29 (* fuzzy parsing, different "views" over the same program *)
30 (* ------------------------------------------------------------------------- *)
33 (* Normally I should not use ref/mutable in the token_extended type
34 * and I should have a set of functions taking a list of tokens and
35 * returning a list of tokens. The problem is that to make easier some
36 * functions, it is better to work on better representation, on "views"
37 * over this list of tokens. But then modifying those views and get
38 * back from those views to the original simple list of tokens is
39 * tedious. One way is to maintain next to the view a list of "actions"
40 * (I was using a hash storing the charpos of the token and associating
41 * the action) but it is tedious too. Simpler to use mutable/ref. We
42 * use the same idea that we use when working on the Ast_c. *)
44 (* old: when I was using the list of "actions" next to the views, the hash
45 * indexed by the charpos, there could have been some problems:
46 * how my fake_pos interact with the way I tag and adjust token ?
47 * because I base my tagging on the position of the token ! so sometimes
48 * could tag another fakeInfo that should not be tagged ?
49 * fortunately I don't use anymore this technique.
52 (* update: quite close to the Place_c.Inxxx *)
54 InFunction
| InEnum
| InStruct
| InInitializer
| NoContext
56 type token_extended
= {
57 mutable tok
: Parser_c.token
;
58 mutable where
: context
;
60 (* less: need also a after ? *)
61 mutable new_tokens_before
: Parser_c.token list
;
63 (* line x col cache, more easily accessible, of the info in the token *)
68 (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
69 * can confuse the parser.
71 let set_as_comment cppkind x
=
73 then () (* otherwise parse_c will be lost if don't find a EOF token *)
75 x
.tok
<- TCommentCpp
(cppkind
, TH.info_of_tok x
.tok
)
77 let save_as_comment cppkind x
=
79 then () (* otherwise parse_c will be lost if don't find a EOF token *)
83 TIfdef _
| TIfdefMisc _
| TIfdefVersion _
-> Token_c.IfDef
84 | TIfdefBool _
-> Token_c.IfDef0
85 | TIfdefelse _
| TIfdefelif _
-> Token_c.Else
86 | TEndif _
-> Token_c.Endif
87 | _
-> Token_c.Other
in
88 x
.tok
<- TCommentCpp
(cppkind
t, TH.info_of_tok x
.tok
)
90 let mk_token_extended x
=
91 let (line
, col
) = TH.linecol_of_tok x
in
93 line
= line
; col
= col
;
95 new_tokens_before
= [];
99 let rebuild_tokens_extented toks_ext
=
100 let _tokens = ref [] in
101 toks_ext
+> List.iter
(fun tok
->
102 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
103 push2 tok
.tok
_tokens
105 let tokens = List.rev
!_tokens in
106 (tokens +> acc_map
mk_token_extended)
110 (* x list list, because x list separated by ',' *)
112 | Parenthised
of paren_grouped list list
* token_extended list
113 | PToken
of token_extended
117 brace_grouped list list
* token_extended
* token_extended
option
118 | BToken
of token_extended
120 (* Far better data structure than doing hacks in the lexer or parser
121 * because in lexer we don't know to which ifdef a endif is related
122 * and so when we want to comment a ifdef, we don't know which endif
123 * we must also comment. Especially true for the #if 0 which sometimes
126 * x list list, because x list separated by #else or #elif
129 | Ifdef
of ifdef_grouped list list
* token_extended list
130 | Ifdefbool
of bool * ifdef_grouped list list
* token_extended list
131 | NotIfdefLine
of token_extended list
134 type 'a line_grouped
=
138 type body_function_grouped
=
139 | BodyFunction
of token_extended list
140 | NotBodyLine
of token_extended list
143 (* ------------------------------------------------------------------------- *)
145 (* ------------------------------------------------------------------------- *)
147 (* todo: synchro ! use more indentation
148 * if paren not closed and same indentation level, certainly because
149 * part of a mid-ifdef-expression.
151 let rec mk_parenthised xs
=
152 let rec loop acc
= function
156 | TOPar _
| TOParDefine _
->
157 let body, extras
, xs
= mk_parameters
[x
] [] xs
in
158 loop (Parenthised
(body,extras
)::acc
) xs
160 loop (PToken x
::acc
) xs
164 (* return the body of the parenthised expression and the rest of the tokens *)
165 and mk_parameters extras acc_before_sep xs
=
168 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
169 pr2 "PB: not found closing paren in fuzzy parsing";
170 [List.rev acc_before_sep
], List.rev extras
, []
174 | TOBrace _
when x
.col
=|= 0 ->
175 pr2 "PB: found synchro point } in paren";
176 [List.rev acc_before_sep
], List.rev
(extras
), (x
::xs
)
178 | TCPar _
| TCParEOL _
->
179 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
180 | TOPar _
| TOParDefine _
->
181 let body, extrasnest
, xs
= mk_parameters
[x
] [] xs
in
183 (Parenthised
(body,extrasnest
)::acc_before_sep
)
186 let body, extras
, xs
= mk_parameters
(x
::extras
) [] xs
in
187 (List.rev acc_before_sep
)::body, extras
, xs
189 mk_parameters extras
(PToken x
::acc_before_sep
) xs
195 let rec mk_braceised xs
=
196 let rec loop acc
= function
201 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
202 loop (Braceised
(body, x
, endbrace
)::acc
) xs
204 pr2 "PB: found closing brace alone in fuzzy parsing";
205 loop (BToken x
::acc
) xs
207 loop (BToken x
::acc
) xs
) in
210 (* return the body of the parenthised expression and the rest of the tokens *)
211 and mk_braceised_aux acc xs
=
214 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
215 pr2 "PB: not found closing brace in fuzzy parsing";
216 [List.rev acc
], None
, []
219 | TCBrace _
-> [List.rev acc
], Some x
, xs
221 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
222 mk_braceised_aux
(Braceised
(body,x
, endbrace
)::acc
) xs
224 mk_braceised_aux
(BToken x
::acc
) xs
230 let rec mk_ifdef xs
=
236 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
237 Ifdef
(body, extra
)::mk_ifdef xs
238 | TIfdefBool
(b
,_
, _
) ->
239 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
241 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
242 if !Flag_parsing_c.if0_passing
243 then Ifdefbool
(b
, body, extra
)::mk_ifdef xs
244 else Ifdef
(body, extra
)::mk_ifdef xs
246 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
247 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
248 Ifdefbool
(b
, body, extra
)::mk_ifdef xs
252 (* todo? can have some Ifdef in the line ? *)
253 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
254 NotIfdefLine
line::mk_ifdef xs
257 and mk_ifdef_parameters extras acc_before_sep xs
=
260 (* Note that mk_ifdef is assuming that CPP instruction are alone
261 * on their line. Because I do a span (fun x -> is_same_line ...)
262 * I might take with me a #endif if this one is mixed on a line
263 * with some "normal" tokens.
265 pr2 "PB: not found closing ifdef in fuzzy parsing";
266 [List.rev acc_before_sep
], List.rev extras
, []
270 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
272 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
274 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
276 | TIfdefBool
(b
,_
,_
) ->
277 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
279 if !Flag_parsing_c.if0_passing
282 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
285 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
288 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
289 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
291 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
295 let body, extras
, xs
= mk_ifdef_parameters
(x
::extras
) [] xs
in
296 (List.rev acc_before_sep
)::body, extras
, xs
298 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
299 mk_ifdef_parameters extras
(NotIfdefLine
line::acc_before_sep
) xs
302 (* --------------------------------------- *)
304 let line_of_paren = function
306 | Parenthised
(xxs
, info_parens
) ->
307 (match info_parens
with
308 | [] -> raise
(Impossible
121)
313 let rec span_line_paren line = function
317 | PToken tok
when TH.is_eof tok
.tok
->
320 if line_of_paren x
=|= line
322 let (l1
, l2
) = span_line_paren line xs
in
328 let rec mk_line_parenthised xs
=
332 let line_no = line_of_paren x
in
333 let line, xs
= span_line_paren line_no xs
in
334 Line
(x
::line)::mk_line_parenthised xs
337 (* --------------------------------------- *)
338 let rec mk_body_function_grouped xs
=
343 | {tok
= TOBrace _
; col
= 0} ->
344 let is_closing_brace = function
345 | {tok
= TCBrace _
; col
= 0 } -> true
348 let body, xs
= Common.span
(fun x
-> not
(is_closing_brace x
)) xs
in
350 | ({tok
= TCBrace _
; col
= 0 })::xs
->
351 BodyFunction
body::mk_body_function_grouped xs
353 pr2 "PB:not found closing brace in fuzzy parsing";
355 | _
-> raise
(Impossible
122)
359 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
360 NotBodyLine
line::mk_body_function_grouped xs
364 (* ------------------------------------------------------------------------- *)
366 (* ------------------------------------------------------------------------- *)
368 let rec iter_token_paren f xs
=
369 xs
+> List.iter
(function
370 | PToken tok
-> f tok
;
371 | Parenthised
(xxs
, info_parens
) ->
372 info_parens
+> List.iter f
;
373 xxs
+> List.iter
(fun xs
-> iter_token_paren f xs
)
376 let rec iter_token_brace f xs
=
377 xs
+> List.iter
(function
378 | BToken tok
-> f tok
;
379 | Braceised
(xxs
, tok1
, tok2opt
) ->
380 f tok1
; do_option f tok2opt
;
381 xxs
+> List.iter
(fun xs
-> iter_token_brace f xs
)
384 let rec iter_token_ifdef f xs
=
385 xs
+> List.iter
(function
386 | NotIfdefLine xs
-> xs
+> List.iter f
;
387 | Ifdefbool
(_
, xxs
, info_ifdef
)
388 | Ifdef
(xxs
, info_ifdef
) ->
389 info_ifdef
+> List.iter f
;
390 xxs
+> List.iter
(iter_token_ifdef f
)
396 let tokens_of_paren xs
=
398 xs
+> iter_token_paren (fun tok
-> push2 tok
g);
402 let tokens_of_paren_ordered xs
=
405 let rec aux_tokens_ordered = function
406 | PToken tok
-> push2 tok
g;
407 | Parenthised
(xxs
, info_parens
) ->
408 let (opar
, cpar
, commas
) =
409 match info_parens
with
411 (match List.rev xs
with
413 opar
, cpar
, List.rev xs
414 | _
-> raise
(Impossible
123)
416 | _
-> raise
(Impossible
124)
419 aux_args
(xxs
,commas
);
422 and aux_args
(xxs
, commas
) =
423 match xxs
, commas
with
425 | [xs
], [] -> xs
+> List.iter
aux_tokens_ordered
426 | xs
::ys
::xxs
, comma
::commas
->
427 xs
+> List.iter
aux_tokens_ordered;
429 aux_args
(ys
::xxs
, commas
)
430 | _
-> raise
(Impossible
125)
434 xs
+> List.iter
aux_tokens_ordered;
439 (* ------------------------------------------------------------------------- *)
440 (* set the context info in token *)
441 (* ------------------------------------------------------------------------- *)
444 let rec set_in_function_tag xs
=
445 (* could try: ) { } but it can be the ) of a if or while, so
446 * better to base the heuristic on the position in column zero.
447 * Note that some struct or enum or init put also their { in first column
448 * but set_in_other will overwrite the previous InFunction tag.
452 (* ) { and the closing } is in column zero, then certainly a function *)
453 | BToken
({tok
= TCPar _
})::(Braceised
(body, tok1
, Some tok2
))::xs
454 when tok1
.col
<> 0 && tok2
.col
=|= 0 ->
455 body +> List.iter
(iter_token_brace (fun tok
->
456 tok
.where
<- InFunction
458 set_in_function_tag xs
460 | (BToken x
)::xs
-> set_in_function_tag xs
462 | (Braceised
(body, tok1
, Some tok2
))::xs
463 when tok1
.col
=|= 0 && tok2
.col
=|= 0 ->
464 body +> List.iter
(iter_token_brace (fun tok
->
465 tok
.where
<- InFunction
467 set_in_function_tag xs
468 | Braceised
(body, tok1
, tok2
)::xs
->
469 set_in_function_tag xs
472 let rec set_in_other xs
=
476 | BToken
({tok
= Tenum _
})::BToken
({tok
= TIdent _
})
477 ::Braceised
(body, tok1
, tok2
)::xs
478 | BToken
({tok
= Tenum _
})
479 ::Braceised
(body, tok1
, tok2
)::xs
481 body +> List.iter
(iter_token_brace (fun tok
->
487 | BToken
({tok
= Tstruct _
})::BToken
({tok
= TIdent _
})
488 ::Braceised
(body, tok1
, tok2
)::xs
->
489 body +> List.iter
(iter_token_brace (fun tok
->
490 tok
.where
<- InStruct
;
494 | BToken
({tok
= TEq _
})
495 ::Braceised
(body, tok1
, tok2
)::xs
->
496 body +> List.iter
(iter_token_brace (fun tok
->
497 tok
.where
<- InInitializer
;
501 | BToken _
::xs
-> set_in_other xs
503 | Braceised
(body, tok1
, tok2
)::xs
->
504 body +> List.iter
set_in_other;
510 let set_context_tag xs
=
512 set_in_function_tag xs
;