3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
18 module TH
= Token_helpers
22 (*****************************************************************************)
23 (* Some debugging functions *)
24 (*****************************************************************************)
26 let pr2, pr2_once
= Common.mk_pr2_wrappers
Flag_parsing_c.verbose_parsing
28 (* ------------------------------------------------------------------------- *)
29 (* fuzzy parsing, different "views" over the same program *)
30 (* ------------------------------------------------------------------------- *)
33 (* Normally I should not use ref/mutable in the token_extended type
34 * and I should have a set of functions taking a list of tokens and
35 * returning a list of tokens. The problem is that to make easier some
36 * functions, it is better to work on better representation, on "views"
37 * over this list of tokens. But then modifying those views and get
38 * back from those views to the original simple list of tokens is
39 * tedious. One way is to maintain next to the view a list of "actions"
40 * (I was using a hash storing the charpos of the token and associating
41 * the action) but it is tedious too. Simpler to use mutable/ref. We
42 * use the same idea that we use when working on the Ast_c. *)
44 (* old: when I was using the list of "actions" next to the views, the hash
45 * indexed by the charpos, there could have been some problems:
46 * how my fake_pos interact with the way I tag and adjust token ?
47 * because I base my tagging on the position of the token ! so sometimes
48 * could tag another fakeInfo that should not be tagged ?
49 * fortunately I don't use anymore this technique.
52 (* update: quite close to the Place_c.Inxxx *)
54 InFunction
| InEnum
| InStruct
| InInitializer
| NoContext
56 type token_extended
= {
57 mutable tok
: Parser_c.token
;
58 mutable where
: context
;
60 (* less: need also a after ? *)
61 mutable new_tokens_before
: Parser_c.token list
;
63 (* line x col cache, more easily accessible, of the info in the token *)
68 (* todo? is it ok to reset as a comment a TDefEOL ? if do that, then
69 * can confuse the parser.
71 let set_as_comment cppkind x
=
73 then () (* otherwise parse_c will be lost if don't find a EOF token *)
75 x
.tok
<- TCommentCpp
(cppkind
, TH.info_of_tok x
.tok
)
77 let mk_token_extended x
=
78 let (line
, col
) = TH.linecol_of_tok x
in
80 line
= line
; col
= col
;
82 new_tokens_before
= [];
86 let rebuild_tokens_extented toks_ext
=
87 let _tokens = ref [] in
88 toks_ext
+> List.iter
(fun tok
->
89 tok
.new_tokens_before
+> List.iter
(fun x
-> push2 x
_tokens);
92 let tokens = List.rev
!_tokens in
93 (tokens +> acc_map
mk_token_extended)
97 (* x list list, because x list separated by ',' *)
99 | Parenthised
of paren_grouped list list
* token_extended list
100 | PToken
of token_extended
104 brace_grouped list list
* token_extended
* token_extended
option
105 | BToken
of token_extended
107 (* Far better data structure than doing hacks in the lexer or parser
108 * because in lexer we don't know to which ifdef a endif is related
109 * and so when we want to comment a ifdef, we don't know which endif
110 * we must also comment. Especially true for the #if 0 which sometimes
113 * x list list, because x list separated by #else or #elif
116 | Ifdef
of ifdef_grouped list list
* token_extended list
117 | Ifdefbool
of bool * ifdef_grouped list list
* token_extended list
118 | NotIfdefLine
of token_extended list
121 type 'a line_grouped
=
125 type body_function_grouped
=
126 | BodyFunction
of token_extended list
127 | NotBodyLine
of token_extended list
130 (* ------------------------------------------------------------------------- *)
132 (* ------------------------------------------------------------------------- *)
134 (* todo: synchro ! use more indentation
135 * if paren not closed and same indentation level, certainly because
136 * part of a mid-ifdef-expression.
138 let rec mk_parenthised xs
=
139 let rec loop acc
= function
143 | TOPar _
| TOParDefine _
->
144 let body, extras
, xs
= mk_parameters
[x
] [] xs
in
145 loop (Parenthised
(body,extras
)::acc
) xs
147 loop (PToken x
::acc
) xs
151 (* return the body of the parenthised expression and the rest of the tokens *)
152 and mk_parameters extras acc_before_sep xs
=
155 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
156 pr2 "PB: not found closing paren in fuzzy parsing";
157 [List.rev acc_before_sep
], List.rev extras
, []
161 | TOBrace _
when x
.col
=|= 0 ->
162 pr2 "PB: found synchro point } in paren";
163 [List.rev acc_before_sep
], List.rev
(extras
), (x
::xs
)
165 | TCPar _
| TCParEOL _
->
166 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
167 | TOPar _
| TOParDefine _
->
168 let body, extrasnest
, xs
= mk_parameters
[x
] [] xs
in
170 (Parenthised
(body,extrasnest
)::acc_before_sep
)
173 let body, extras
, xs
= mk_parameters
(x
::extras
) [] xs
in
174 (List.rev acc_before_sep
)::body, extras
, xs
176 mk_parameters extras
(PToken x
::acc_before_sep
) xs
182 let rec mk_braceised xs
=
183 let rec loop acc
= function
188 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
189 loop (Braceised
(body, x
, endbrace
)::acc
) xs
191 pr2 "PB: found closing brace alone in fuzzy parsing";
192 loop (BToken x
::acc
) xs
194 loop (BToken x
::acc
) xs
) in
197 (* return the body of the parenthised expression and the rest of the tokens *)
198 and mk_braceised_aux acc xs
=
201 (* maybe because of #ifdef which "opens" '(' in 2 branches *)
202 pr2 "PB: not found closing brace in fuzzy parsing";
203 [List.rev acc
], None
, []
206 | TCBrace _
-> [List.rev acc
], Some x
, xs
208 let body, endbrace
, xs
= mk_braceised_aux
[] xs
in
209 mk_braceised_aux
(Braceised
(body,x
, endbrace
)::acc
) xs
211 mk_braceised_aux
(BToken x
::acc
) xs
217 let rec mk_ifdef xs
=
223 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
224 Ifdef
(body, extra
)::mk_ifdef xs
225 | TIfdefBool
(b
,_
, _
) ->
226 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
228 (* if not passing, then consider a #if 0 as an ordinary #ifdef *)
229 if !Flag_parsing_c.if0_passing
230 then Ifdefbool
(b
, body, extra
)::mk_ifdef xs
231 else Ifdef
(body, extra
)::mk_ifdef xs
233 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
234 let body, extra
, xs
= mk_ifdef_parameters
[x
] [] xs
in
235 Ifdefbool
(b
, body, extra
)::mk_ifdef xs
239 (* todo? can have some Ifdef in the line ? *)
240 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
241 NotIfdefLine
line::mk_ifdef xs
244 and mk_ifdef_parameters extras acc_before_sep xs
=
247 (* Note that mk_ifdef is assuming that CPP instruction are alone
248 * on their line. Because I do a span (fun x -> is_same_line ...)
249 * I might take with me a #endif if this one is mixed on a line
250 * with some "normal" tokens.
252 pr2 "PB: not found closing ifdef in fuzzy parsing";
253 [List.rev acc_before_sep
], List.rev extras
, []
257 [List.rev acc_before_sep
], List.rev
(x
::extras
), xs
259 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
261 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
263 | TIfdefBool
(b
,_
,_
) ->
264 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
266 if !Flag_parsing_c.if0_passing
269 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
272 extras
(Ifdef
(body, extrasnest
)::acc_before_sep
) xs
275 | TIfdefMisc
(b
,_
,_
) | TIfdefVersion
(b
,_
,_
) ->
276 let body, extrasnest
, xs
= mk_ifdef_parameters
[x
] [] xs
in
278 extras
(Ifdefbool
(b
, body, extrasnest
)::acc_before_sep
) xs
282 let body, extras
, xs
= mk_ifdef_parameters
(x
::extras
) [] xs
in
283 (List.rev acc_before_sep
)::body, extras
, xs
285 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
286 mk_ifdef_parameters extras
(NotIfdefLine
line::acc_before_sep
) xs
289 (* --------------------------------------- *)
291 let line_of_paren = function
293 | Parenthised
(xxs
, info_parens
) ->
294 (match info_parens
with
295 | [] -> raise Impossible
300 let rec span_line_paren line = function
304 | PToken tok
when TH.is_eof tok
.tok
->
307 if line_of_paren x
=|= line
309 let (l1
, l2
) = span_line_paren line xs
in
315 let rec mk_line_parenthised xs
=
319 let line_no = line_of_paren x
in
320 let line, xs
= span_line_paren line_no xs
in
321 Line
(x
::line)::mk_line_parenthised xs
324 (* --------------------------------------- *)
325 let rec mk_body_function_grouped xs
=
330 | {tok
= TOBrace _
; col
= 0} ->
331 let is_closing_brace = function
332 | {tok
= TCBrace _
; col
= 0 } -> true
335 let body, xs
= Common.span
(fun x
-> not
(is_closing_brace x
)) xs
in
337 | ({tok
= TCBrace _
; col
= 0 })::xs
->
338 BodyFunction
body::mk_body_function_grouped xs
340 pr2 "PB:not found closing brace in fuzzy parsing";
342 | _
-> raise Impossible
346 let line, xs
= Common.span
(fun y
-> y
.line =|= x
.line) (x
::xs
) in
347 NotBodyLine
line::mk_body_function_grouped xs
351 (* ------------------------------------------------------------------------- *)
353 (* ------------------------------------------------------------------------- *)
355 let rec iter_token_paren f xs
=
356 xs
+> List.iter
(function
357 | PToken tok
-> f tok
;
358 | Parenthised
(xxs
, info_parens
) ->
359 info_parens
+> List.iter f
;
360 xxs
+> List.iter
(fun xs
-> iter_token_paren f xs
)
363 let rec iter_token_brace f xs
=
364 xs
+> List.iter
(function
365 | BToken tok
-> f tok
;
366 | Braceised
(xxs
, tok1
, tok2opt
) ->
367 f tok1
; do_option f tok2opt
;
368 xxs
+> List.iter
(fun xs
-> iter_token_brace f xs
)
371 let rec iter_token_ifdef f xs
=
372 xs
+> List.iter
(function
373 | NotIfdefLine xs
-> xs
+> List.iter f
;
374 | Ifdefbool
(_
, xxs
, info_ifdef
)
375 | Ifdef
(xxs
, info_ifdef
) ->
376 info_ifdef
+> List.iter f
;
377 xxs
+> List.iter
(iter_token_ifdef f
)
383 let tokens_of_paren xs
=
385 xs
+> iter_token_paren (fun tok
-> push2 tok
g);
389 let tokens_of_paren_ordered xs
=
392 let rec aux_tokens_ordered = function
393 | PToken tok
-> push2 tok
g;
394 | Parenthised
(xxs
, info_parens
) ->
395 let (opar
, cpar
, commas
) =
396 match info_parens
with
398 (match List.rev xs
with
400 opar
, cpar
, List.rev xs
401 | _
-> raise Impossible
403 | _
-> raise Impossible
406 aux_args
(xxs
,commas
);
409 and aux_args
(xxs
, commas
) =
410 match xxs
, commas
with
412 | [xs
], [] -> xs
+> List.iter
aux_tokens_ordered
413 | xs
::ys
::xxs
, comma
::commas
->
414 xs
+> List.iter
aux_tokens_ordered;
416 aux_args
(ys
::xxs
, commas
)
417 | _
-> raise Impossible
421 xs
+> List.iter
aux_tokens_ordered;
426 (* ------------------------------------------------------------------------- *)
427 (* set the context info in token *)
428 (* ------------------------------------------------------------------------- *)
431 let rec set_in_function_tag xs
=
432 (* could try: ) { } but it can be the ) of a if or while, so
433 * better to base the heuristic on the position in column zero.
434 * Note that some struct or enum or init put also their { in first column
435 * but set_in_other will overwrite the previous InFunction tag.
439 (* ) { and the closing } is in column zero, then certainly a function *)
440 | BToken
({tok
= TCPar _
})::(Braceised
(body, tok1
, Some tok2
))::xs
441 when tok1
.col
<> 0 && tok2
.col
=|= 0 ->
442 body +> List.iter
(iter_token_brace (fun tok
->
443 tok
.where
<- InFunction
445 set_in_function_tag xs
447 | (BToken x
)::xs
-> set_in_function_tag xs
449 | (Braceised
(body, tok1
, Some tok2
))::xs
450 when tok1
.col
=|= 0 && tok2
.col
=|= 0 ->
451 body +> List.iter
(iter_token_brace (fun tok
->
452 tok
.where
<- InFunction
454 set_in_function_tag xs
455 | Braceised
(body, tok1
, tok2
)::xs
->
456 set_in_function_tag xs
459 let rec set_in_other xs
=
463 | BToken
({tok
= Tenum _
})::BToken
({tok
= TIdent _
})
464 ::Braceised
(body, tok1
, tok2
)::xs
465 | BToken
({tok
= Tenum _
})
466 ::Braceised
(body, tok1
, tok2
)::xs
468 body +> List.iter
(iter_token_brace (fun tok
->
474 | BToken
({tok
= Tstruct _
})::BToken
({tok
= TIdent _
})
475 ::Braceised
(body, tok1
, tok2
)::xs
->
476 body +> List.iter
(iter_token_brace (fun tok
->
477 tok
.where
<- InStruct
;
481 | BToken
({tok
= TEq _
})
482 ::Braceised
(body, tok1
, tok2
)::xs
->
483 body +> List.iter
(iter_token_brace (fun tok
->
484 tok
.where
<- InInitializer
;
488 | BToken _
::xs
-> set_in_other xs
490 | Braceised
(body, tok1
, tok2
)::xs
->
491 body +> List.iter
set_in_other;
497 let set_context_tag xs
=
499 set_in_function_tag xs
;