cefa5e9fcd9395b1d768e2764200afe1225c3e8f
[bpt/coccinelle.git] / parsing_c / token_helpers.ml
1 (* Yoann Padioleau
2 *
3 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
4 *
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License (GPL)
7 * version 2 as published by the Free Software Foundation.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * file license.txt for more details.
13 *)
14
15
16 open Common
17
18 open Parser_c
19
20 (*****************************************************************************)
21 (* Is_xxx, categories *)
22 (*****************************************************************************)
23
24 (* could define a type token_class = Comment | Ident | Operator | ...
25 * update: now token_c can maybe do that.
26 * but still, sometimes tokens belon to multiple classes. Could maybe
27 * return then a set of classes.
28 *)
29
30 let is_space = function
31 | TCommentSpace _ -> true
32 | TCommentNewline _ -> true
33 | _ -> false
34
35 let is_whitespace = is_space
36
37 let is_just_comment_or_space = function
38 | TComment _ -> true
39 | TCommentSpace _ -> true
40 | TCommentNewline _ -> true
41 | _ -> false
42 let is_real_comment = is_just_comment_or_space
43
44 let is_just_comment = function
45 | TComment _ -> true
46 | _ -> false
47
48
49
50
51 let is_comment = function
52 | TComment _
53 | TCommentSpace _ | TCommentNewline _
54 | TCommentCpp _
55 | TCommentMisc _ -> true
56 | _ -> false
57
58 (* coupling with comment_annotater_c.ml.
59 * In fact more tokens than comments are not in the ast, but
60 * they were usually temporally created by ocamllex and removed
61 * in parsing_hacks.
62 *)
63 let is_not_in_ast = is_comment
64
65 let is_fake_comment = function
66 | TCommentCpp _ | TCommentMisc _
67 -> true
68 | _ -> false
69
70 let is_not_comment x =
71 not (is_comment x)
72
73
74 (* ---------------------------------------------------------------------- *)
75
76 let is_cpp_instruction = function
77 | TInclude _
78 | TDefine _
79 | TIfdef _ | TIfdefelse _ | TIfdefelif _ | TEndif _
80 | TIfdefBool _ | TIfdefMisc _ | TIfdefVersion _
81 | TUndef _
82 | TCppDirectiveOther _
83 -> true
84 | _ -> false
85
86
87 let is_gcc_token = function
88 | Tasm _
89 | Tinline _
90 | Tattribute _
91 | Ttypeof _
92 -> true
93 | _ -> false
94
95
96
97
98 (* ---------------------------------------------------------------------- *)
99 let is_opar = function
100 | TOPar _ | TOParDefine _ -> true
101 | _ -> false
102
103 let is_cpar = function
104 | TCPar _ | TCParEOL _ -> true
105 | _ -> false
106
107
108 let is_obrace = function
109 | TOBrace _ | TOBraceDefineInit _ -> true
110 | _ -> false
111
112 let is_cbrace = function
113 | TCBrace _ -> true
114 | _ -> false
115
116
117
118
119 (* ---------------------------------------------------------------------- *)
120 let is_eof = function
121 | EOF x -> true
122 | _ -> false
123
124
125
126 let is_statement = function
127 | Tfor _ | Tdo _ | Tif _ | Twhile _ | Treturn _
128 | Tbreak _ | Telse _ | Tswitch _ | Tcase _ | Tcontinue _
129 | Tgoto _
130 | TPtVirg _
131 | TMacroIterator _
132 -> true
133 | _ -> false
134
135 (* is_start_of_something is used in parse_c for error recovery, to find
136 * a synchronisation token.
137 *
138 * Would like to put TIdent or TDefine, TIfdef but they can be in the
139 * middle of a function, for instance with label:.
140 *
141 * Could put Typedefident but fired ? it would work in error recovery
142 * on the already_passed tokens, which has been already gone in the
143 * Parsing_hacks.lookahead machinery, but it will not work on the
144 * "next" tokens. But because the namespace for labels is different
145 * from namespace for ident/typedef, we can use the name for a typedef
146 * for a label and so dangerous to put Typedefident at true here.
147 *
148 * Can look in parser_c.output to know what can be at toplevel
149 * at the very beginning.
150 *)
151
152 let is_start_of_something = function
153 | Tchar _ | Tshort _ | Tint _ | Tdouble _ | Tfloat _ | Tlong _
154 | Tunsigned _ | Tsigned _ | Tvoid _
155 | Tauto _ | Tregister _ | Textern _ | Tstatic _
156 | Tconst _ | Tvolatile _
157 | Ttypedef _
158 | Tstruct _ | Tunion _ | Tenum _
159 -> true
160 | _ -> false
161
162
163
164 let is_binary_operator = function
165 | TOrLog _ | TAndLog _ | TOr _ | TXor _ | TAnd _
166 | TEqEq _ | TNotEq _ | TInf _ | TSup _ | TInfEq _ | TSupEq _
167 | TShl _ | TShr _
168 | TPlus _ | TMinus _ | TMul _ | TDiv _ | TMod _
169 -> true
170 | _ -> false
171
172 let is_stuff_taking_parenthized = function
173 | Tif _
174 | Twhile _
175 | Tswitch _
176 | Ttypeof _
177 | TMacroIterator _
178 -> true
179 | _ -> false
180
181
182 (* used in the algorithms for "10 most problematic errors" *)
183 let is_ident_like = function
184 | TIdent _
185 | TypedefIdent _
186 | TIdentDefine _
187 | TDefParamVariadic _
188
189 | TUnknown _
190
191 | TMacroAttr _
192 | TMacroAttrStorage _
193 | TMacroStmt _
194 | TMacroString _
195 | TMacroDecl _
196 | TMacroStructDecl _
197 | TMacroDeclConst _
198 | TMacroIterator _
199 -> true
200
201 | _ -> false
202
203
204 (*****************************************************************************)
205 (* Visitors *)
206 (*****************************************************************************)
207
208 (* Because ocamlyacc force us to do it that way. The ocamlyacc token
209 * cant be a pair of a sum type, it must be directly a sum type.
210 *)
211 let info_of_tok = function
212 | TString ((string, isWchar), i) -> i
213 | TChar ((string, isWchar), i) -> i
214 | TFloat ((string, floatType), i) -> i
215
216 | TAssign (assignOp, i) -> i
217
218 | TIdent (s, i) -> i
219 | TypedefIdent (s, i) -> i
220
221 | TInt (s, i) -> i
222
223 | TDefine (ii) -> ii
224 | TInclude (includes, filename, inifdef, i1) -> i1
225
226 | TUndef (s, ii) -> ii
227 | TCppDirectiveOther (ii) -> ii
228
229 | TIncludeStart (i1, inifdef) -> i1
230 | TIncludeFilename (s, i1) -> i1
231
232 | TDefEOL (i1) -> i1
233 | TOParDefine (i1) -> i1
234 | TIdentDefine (s, i) -> i
235 | TCppEscapedNewline (ii) -> ii
236 | TDefParamVariadic (s, i1) -> i1
237
238 | TCppConcatOp (ii) -> ii
239
240 | TOBraceDefineInit (i1) -> i1
241
242 | TUnknown (i) -> i
243
244 | TMacroIdentBuilder (s, i) -> i
245 | TMacroAttr (s, i) -> i
246 | TMacroAttrStorage (s, i) -> i
247 | TMacroStmt (s, i) -> i
248 | TMacroString (s, i) -> i
249 | TMacroDecl (s, i) -> i
250 | TMacroStructDecl (s, i) -> i
251 | TMacroDeclConst (i) -> i
252 | TMacroIterator (s,i) -> i
253 (* | TMacroTop (s,i) -> i *)
254 | TCParEOL (i1) -> i1
255
256 | TAction (i) -> i
257
258 | TComment (i) -> i
259 | TCommentSpace (i) -> i
260 | TCommentNewline (i) -> i
261 | TCommentCpp (cppkind, i) -> i
262 | TCommentMisc (i) -> i
263
264 | TCommentSkipTagStart (i) -> i
265 | TCommentSkipTagEnd (i) -> i
266
267 | TIfdef (_, i) -> i
268 | TIfdefelse (_, i) -> i
269 | TIfdefelif (_, i) -> i
270 | TEndif (_, i) -> i
271 | TIfdefBool (b, _, i) -> i
272 | TIfdefMisc (b, _, i) -> i
273 | TIfdefVersion (b, _, i) -> i
274
275 | TOPar (i) -> i
276 | TCPar (i) -> i
277 | TOBrace (i) -> i
278 | TCBrace (i) -> i
279 | TOCro (i) -> i
280 | TCCro (i) -> i
281 | TDot (i) -> i
282 | TComma (i) -> i
283 | TPtrOp (i) -> i
284 | TInc (i) -> i
285 | TDec (i) -> i
286 | TEq (i) -> i
287 | TWhy (i) -> i
288 | TTilde (i) -> i
289 | TBang (i) -> i
290 | TEllipsis (i) -> i
291 | TDotDot (i) -> i
292 | TPtVirg (i) -> i
293 | TOrLog (i) -> i
294 | TAndLog (i) -> i
295 | TOr (i) -> i
296 | TXor (i) -> i
297 | TAnd (i) -> i
298 | TEqEq (i) -> i
299 | TNotEq (i) -> i
300 | TInf (i) -> i
301 | TSup (i) -> i
302 | TInfEq (i) -> i
303 | TSupEq (i) -> i
304 | TShl (i) -> i
305 | TShr (i) -> i
306 | TPlus (i) -> i
307 | TMinus (i) -> i
308 | TMul (i) -> i
309 | TDiv (i) -> i
310 | TMod (i) -> i
311
312 | Tchar (i) -> i
313 | Tshort (i) -> i
314 | Tint (i) -> i
315 | Tdouble (i) -> i
316 | Tfloat (i) -> i
317 | Tlong (i) -> i
318 | Tunsigned (i) -> i
319 | Tsigned (i) -> i
320 | Tvoid (i) -> i
321 | Tauto (i) -> i
322 | Tregister (i) -> i
323 | Textern (i) -> i
324 | Tstatic (i) -> i
325 | Tconst (i) -> i
326 | Tvolatile (i) -> i
327
328 | Trestrict (i) -> i
329
330 | Tstruct (i) -> i
331 | Tenum (i) -> i
332 | Ttypedef (i) -> i
333 | Tunion (i) -> i
334 | Tbreak (i) -> i
335 | Telse (i) -> i
336 | Tswitch (i) -> i
337 | Tcase (i) -> i
338 | Tcontinue (i) -> i
339 | Tfor (i) -> i
340 | Tdo (i) -> i
341 | Tif (i) -> i
342 | Twhile (i) -> i
343 | Treturn (i) -> i
344 | Tgoto (i) -> i
345 | Tdefault (i) -> i
346 | Tsizeof (i) -> i
347 | Tasm (i) -> i
348 | Tattribute (i) -> i
349 | Tinline (i) -> i
350 | Ttypeof (i) -> i
351
352 | EOF (i) -> i
353
354
355
356
357 (* used by tokens to complete the parse_info with filename, line, col infos *)
358 let visitor_info_of_tok f = function
359 | TString ((s, isWchar), i) -> TString ((s, isWchar), f i)
360 | TChar ((s, isWchar), i) -> TChar ((s, isWchar), f i)
361 | TFloat ((s, floatType), i) -> TFloat ((s, floatType), f i)
362 | TAssign (assignOp, i) -> TAssign (assignOp, f i)
363
364 | TIdent (s, i) -> TIdent (s, f i)
365 | TypedefIdent (s, i) -> TypedefIdent (s, f i)
366 | TInt (s, i) -> TInt (s, f i)
367
368 | TDefine (i1) -> TDefine(f i1)
369
370 | TUndef (s,i1) -> TUndef(s, f i1)
371 | TCppDirectiveOther (i1) -> TCppDirectiveOther(f i1)
372
373 | TInclude (includes, filename, inifdef, i1) ->
374 TInclude (includes, filename, inifdef, f i1)
375
376 | TIncludeStart (i1, inifdef) -> TIncludeStart (f i1, inifdef)
377 | TIncludeFilename (s, i1) -> TIncludeFilename (s, f i1)
378
379 | TCppEscapedNewline (i1) -> TCppEscapedNewline (f i1)
380 | TDefEOL (i1) -> TDefEOL (f i1)
381
382 | TCppConcatOp (ii) -> TCppConcatOp (f ii)
383
384 | TOParDefine (i1) -> TOParDefine (f i1)
385 | TIdentDefine (s, i) -> TIdentDefine (s, f i)
386
387 | TDefParamVariadic (s, i1) -> TDefParamVariadic (s, f i1)
388
389 | TOBraceDefineInit (i1) -> TOBraceDefineInit (f i1)
390
391
392 | TUnknown (i) -> TUnknown (f i)
393
394 | TMacroIdentBuilder (s, i) -> TMacroIdentBuilder (s, f i)
395 | TMacroAttr (s, i) -> TMacroAttr (s, f i)
396 | TMacroAttrStorage (s, i) -> TMacroAttrStorage (s, f i)
397 | TMacroStmt (s, i) -> TMacroStmt (s, f i)
398 | TMacroString (s, i) -> TMacroString (s, f i)
399 | TMacroDecl (s, i) -> TMacroDecl (s, f i)
400 | TMacroStructDecl (s, i) -> TMacroStructDecl (s, f i)
401 | TMacroDeclConst (i) -> TMacroDeclConst (f i)
402 | TMacroIterator (s, i) -> TMacroIterator (s, f i)
403 (* | TMacroTop (s,i) -> TMacroTop (s,f i) *)
404 | TCParEOL (i) -> TCParEOL (f i)
405
406
407 | TAction (i) -> TAction (f i)
408
409 | TComment (i) -> TComment (f i)
410 | TCommentSpace (i) -> TCommentSpace (f i)
411 | TCommentNewline (i) -> TCommentNewline (f i)
412 | TCommentCpp (cppkind, i) -> TCommentCpp (cppkind, f i)
413 | TCommentMisc (i) -> TCommentMisc (f i)
414
415 | TCommentSkipTagStart (i) -> TCommentSkipTagStart (f i)
416 | TCommentSkipTagEnd (i) -> TCommentSkipTagEnd (f i)
417
418 | TIfdef (t, i) -> TIfdef (t, f i)
419 | TIfdefelse (t, i) -> TIfdefelse (t, f i)
420 | TIfdefelif (t, i) -> TIfdefelif (t, f i)
421 | TEndif (t, i) -> TEndif (t, f i)
422 | TIfdefBool (b, t, i) -> TIfdefBool (b, t, f i)
423 | TIfdefMisc (b, t, i) -> TIfdefMisc (b, t, f i)
424 | TIfdefVersion (b, t, i) -> TIfdefVersion (b, t, f i)
425
426 | TOPar (i) -> TOPar (f i)
427 | TCPar (i) -> TCPar (f i)
428 | TOBrace (i) -> TOBrace (f i)
429 | TCBrace (i) -> TCBrace (f i)
430 | TOCro (i) -> TOCro (f i)
431 | TCCro (i) -> TCCro (f i)
432 | TDot (i) -> TDot (f i)
433 | TComma (i) -> TComma (f i)
434 | TPtrOp (i) -> TPtrOp (f i)
435 | TInc (i) -> TInc (f i)
436 | TDec (i) -> TDec (f i)
437 | TEq (i) -> TEq (f i)
438 | TWhy (i) -> TWhy (f i)
439 | TTilde (i) -> TTilde (f i)
440 | TBang (i) -> TBang (f i)
441 | TEllipsis (i) -> TEllipsis (f i)
442 | TDotDot (i) -> TDotDot (f i)
443 | TPtVirg (i) -> TPtVirg (f i)
444 | TOrLog (i) -> TOrLog (f i)
445 | TAndLog (i) -> TAndLog (f i)
446 | TOr (i) -> TOr (f i)
447 | TXor (i) -> TXor (f i)
448 | TAnd (i) -> TAnd (f i)
449 | TEqEq (i) -> TEqEq (f i)
450 | TNotEq (i) -> TNotEq (f i)
451 | TInf (i) -> TInf (f i)
452 | TSup (i) -> TSup (f i)
453 | TInfEq (i) -> TInfEq (f i)
454 | TSupEq (i) -> TSupEq (f i)
455 | TShl (i) -> TShl (f i)
456 | TShr (i) -> TShr (f i)
457 | TPlus (i) -> TPlus (f i)
458 | TMinus (i) -> TMinus (f i)
459 | TMul (i) -> TMul (f i)
460 | TDiv (i) -> TDiv (f i)
461 | TMod (i) -> TMod (f i)
462 | Tchar (i) -> Tchar (f i)
463 | Tshort (i) -> Tshort (f i)
464 | Tint (i) -> Tint (f i)
465 | Tdouble (i) -> Tdouble (f i)
466 | Tfloat (i) -> Tfloat (f i)
467 | Tlong (i) -> Tlong (f i)
468 | Tunsigned (i) -> Tunsigned (f i)
469 | Tsigned (i) -> Tsigned (f i)
470 | Tvoid (i) -> Tvoid (f i)
471 | Tauto (i) -> Tauto (f i)
472 | Tregister (i) -> Tregister (f i)
473 | Textern (i) -> Textern (f i)
474 | Tstatic (i) -> Tstatic (f i)
475 | Tconst (i) -> Tconst (f i)
476 | Tvolatile (i) -> Tvolatile (f i)
477
478 | Trestrict (i) -> Trestrict (f i)
479
480 | Tstruct (i) -> Tstruct (f i)
481 | Tenum (i) -> Tenum (f i)
482 | Ttypedef (i) -> Ttypedef (f i)
483 | Tunion (i) -> Tunion (f i)
484 | Tbreak (i) -> Tbreak (f i)
485 | Telse (i) -> Telse (f i)
486 | Tswitch (i) -> Tswitch (f i)
487 | Tcase (i) -> Tcase (f i)
488 | Tcontinue (i) -> Tcontinue (f i)
489 | Tfor (i) -> Tfor (f i)
490 | Tdo (i) -> Tdo (f i)
491 | Tif (i) -> Tif (f i)
492 | Twhile (i) -> Twhile (f i)
493 | Treturn (i) -> Treturn (f i)
494 | Tgoto (i) -> Tgoto (f i)
495 | Tdefault (i) -> Tdefault (f i)
496 | Tsizeof (i) -> Tsizeof (f i)
497 | Tasm (i) -> Tasm (f i)
498 | Tattribute (i) -> Tattribute (f i)
499 | Tinline (i) -> Tinline (f i)
500 | Ttypeof (i) -> Ttypeof (f i)
501 | EOF (i) -> EOF (f i)
502
503
504 (*****************************************************************************)
505 (* Accessors *)
506 (*****************************************************************************)
507
508 let linecol_of_tok tok =
509 let info = info_of_tok tok in
510 Ast_c.line_of_info info, Ast_c.col_of_info info
511
512 let col_of_tok x = snd (linecol_of_tok x)
513 let line_of_tok x = fst (linecol_of_tok x)
514 let pos_of_tok x = Ast_c.opos_of_info (info_of_tok x)
515 let str_of_tok x = Ast_c.str_of_info (info_of_tok x)
516 let file_of_tok x = Ast_c.file_of_info (info_of_tok x)
517 let pinfo_of_tok x = Ast_c.pinfo_of_info (info_of_tok x)
518
519 let is_origin x =
520 match pinfo_of_tok x with Ast_c.OriginTok _ -> true | _ -> false
521 let is_expanded x =
522 match pinfo_of_tok x with Ast_c.ExpandedTok _ -> true | _ -> false
523 let is_fake x =
524 match pinfo_of_tok x with Ast_c.FakeTok _ -> true | _ -> false
525 let is_abstract x =
526 match pinfo_of_tok x with Ast_c.AbstractLineTok _ -> true | _ -> false
527
528 (*****************************************************************************)
529 (* Helpers *)
530 (*****************************************************************************)
531 let is_same_line_or_close line tok =
532 line_of_tok tok =|= line ||
533 line_of_tok tok =|= line - 1 ||
534 line_of_tok tok =|= line - 2
535