permit multiline comments and strings in macros
[bpt/coccinelle.git] / parsing_c / token_helpers.ml
CommitLineData
0708f913 1(* Yoann Padioleau
ae4735db
C
2 *
3 * Copyright (C) 2010, University of Copenhagen DIKU and INRIA.
0708f913
C
4 * Copyright (C) 2007, 2008 Ecole des Mines de Nantes
5 *
6 * This program is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU General Public License (GPL)
8 * version 2 as published by the Free Software Foundation.
ae4735db 9 *
0708f913
C
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * file license.txt for more details.
14 *)
15
16
34e49164
C
17open Common
18
19open Parser_c
20
21(*****************************************************************************)
22(* Is_xxx, categories *)
23(*****************************************************************************)
24
ae4735db 25(* could define a type token_class = Comment | Ident | Operator | ...
0708f913 26 * update: now token_c can maybe do that.
ae4735db 27 * but still, sometimes tokens belon to multiple classes. Could maybe
0708f913 28 * return then a set of classes.
91eba41f
C
29 *)
30
34e49164
C
31let is_space = function
32 | TCommentSpace _ -> true
485bce71 33 | TCommentNewline _ -> true
34e49164
C
34 | _ -> false
35
485bce71
C
36let is_whitespace = is_space
37
0708f913 38let is_just_comment_or_space = function
34e49164
C
39 | TComment _ -> true
40 | TCommentSpace _ -> true
485bce71 41 | TCommentNewline _ -> true
34e49164 42 | _ -> false
0708f913 43let is_real_comment = is_just_comment_or_space
34e49164
C
44
45let is_just_comment = function
46 | TComment _ -> true
47 | _ -> false
48
0708f913
C
49
50
51
34e49164 52let is_comment = function
ae4735db
C
53 | TComment _
54 | TCommentSpace _ | TCommentNewline _
55 | TCommentCpp _
34e49164
C
56 | TCommentMisc _ -> true
57 | _ -> false
58
0708f913
C
59(* coupling with comment_annotater_c.ml.
60 * In fact more tokens than comments are not in the ast, but
ae4735db 61 * they were usually temporally created by ocamllex and removed
0708f913
C
62 * in parsing_hacks.
63*)
64let is_not_in_ast = is_comment
34e49164
C
65
66let is_fake_comment = function
ae4735db 67 | TCommentCpp _ | TCommentMisc _
34e49164
C
68 -> true
69 | _ -> false
70
ae4735db 71let is_not_comment x =
34e49164
C
72 not (is_comment x)
73
74
0708f913 75(* ---------------------------------------------------------------------- *)
485bce71 76
34e49164 77let is_cpp_instruction = function
ae4735db 78 | TInclude _
485bce71 79 | TDefine _
ae4735db 80 | TIfdef _ | TIfdefelse _ | TIfdefelif _ | TEndif _
34e49164 81 | TIfdefBool _ | TIfdefMisc _ | TIfdefVersion _
ae4735db 82 | TUndef _
485bce71
C
83 | TCppDirectiveOther _
84 -> true
85 | _ -> false
86
87
88let is_gcc_token = function
ae4735db
C
89 | Tasm _
90 | Tinline _
91 | Tattribute _
92 | Ttypeof _
34e49164
C
93 -> true
94 | _ -> false
95
96
97
98
0708f913 99(* ---------------------------------------------------------------------- *)
34e49164
C
100let is_opar = function
101 | TOPar _ | TOParDefine _ -> true
102 | _ -> false
103
104let is_cpar = function
105 | TCPar _ | TCParEOL _ -> true
106 | _ -> false
107
485bce71
C
108
109let is_obrace = function
110 | TOBrace _ | TOBraceDefineInit _ -> true
111 | _ -> false
112
113let is_cbrace = function
114 | TCBrace _ -> true
ae4735db 115 | _ -> false
485bce71
C
116
117
118
119
0708f913 120(* ---------------------------------------------------------------------- *)
708f4980
C
121
122(* end of file *)
34e49164
C
123let is_eof = function
124 | EOF x -> true
125 | _ -> false
126
485bce71 127
708f4980
C
128(* end of macro *)
129let is_eom = function
130 | TDefEOL _ -> true
131 | _ -> false
485bce71 132
34e49164 133let is_statement = function
ae4735db 134 | Tfor _ | Tdo _ | Tif _ | Twhile _ | Treturn _
34e49164 135 | Tbreak _ | Telse _ | Tswitch _ | Tcase _ | Tcontinue _
ae4735db 136 | Tgoto _
34e49164
C
137 | TPtVirg _
138 | TMacroIterator _
139 -> true
140 | _ -> false
141
142(* is_start_of_something is used in parse_c for error recovery, to find
143 * a synchronisation token.
ae4735db 144 *
34e49164
C
145 * Would like to put TIdent or TDefine, TIfdef but they can be in the
146 * middle of a function, for instance with label:.
ae4735db 147 *
34e49164
C
148 * Could put Typedefident but fired ? it would work in error recovery
149 * on the already_passed tokens, which has been already gone in the
150 * Parsing_hacks.lookahead machinery, but it will not work on the
151 * "next" tokens. But because the namespace for labels is different
152 * from namespace for ident/typedef, we can use the name for a typedef
ae4735db
C
153 * for a label and so dangerous to put Typedefident at true here.
154 *
34e49164
C
155 * Can look in parser_c.output to know what can be at toplevel
156 * at the very beginning.
157 *)
158
159let is_start_of_something = function
ae4735db 160 | Tchar _ | Tshort _ | Tint _ | Tdouble _ | Tfloat _ | Tlong _
1eddfd50 161 | Tunsigned _ | Tsigned _ | Tvoid _ | Tsize_t _ | Tssize_t _ | Tptrdiff_t _
34e49164
C
162 | Tauto _ | Tregister _ | Textern _ | Tstatic _
163 | Tconst _ | Tvolatile _
164 | Ttypedef _
ae4735db 165 | Tstruct _ | Tunion _ | Tenum _
34e49164
C
166 -> true
167 | _ -> false
168
169
170
171let is_binary_operator = function
ae4735db
C
172 | TOrLog _ | TAndLog _ | TOr _ | TXor _ | TAnd _
173 | TEqEq _ | TNotEq _ | TInf _ | TSup _ | TInfEq _ | TSupEq _
174 | TShl _ | TShr _
1b9ae606 175 | TPlus _ | TMinus _ | TMul _ | TDiv _ | TMod _ | TMin _ | TMax _
34e49164 176 -> true
ae4735db 177 | _ -> false
34e49164
C
178
179let is_stuff_taking_parenthized = function
ae4735db
C
180 | Tif _
181 | Twhile _
34e49164
C
182 | Tswitch _
183 | Ttypeof _
184 | TMacroIterator _
ae4735db 185 -> true
34e49164
C
186 | _ -> false
187
91eba41f 188
0708f913 189(* used in the algorithms for "10 most problematic errors" *)
91eba41f
C
190let is_ident_like = function
191 | TIdent _
97111a47 192 | TKRParam _
91eba41f
C
193 | TypedefIdent _
194 | TIdentDefine _
195 | TDefParamVariadic _
196
197 | TUnknown _
198
199 | TMacroAttr _
200 | TMacroAttrStorage _
201 | TMacroStmt _
202 | TMacroString _
203 | TMacroDecl _
91eba41f
C
204 | TMacroDeclConst _
205 | TMacroIterator _
206 -> true
207
ae4735db 208 | _ -> false
91eba41f
C
209
210
34e49164
C
211(*****************************************************************************)
212(* Visitors *)
213(*****************************************************************************)
214
ae4735db 215(* Because ocamlyacc force us to do it that way. The ocamlyacc token
34e49164
C
216 * cant be a pair of a sum type, it must be directly a sum type.
217 *)
218let info_of_tok = function
219 | TString ((string, isWchar), i) -> i
220 | TChar ((string, isWchar), i) -> i
221 | TFloat ((string, floatType), i) -> i
222
223 | TAssign (assignOp, i) -> i
224
225 | TIdent (s, i) -> i
97111a47 226 | TKRParam (s, i) -> i
f59c9fb7 227 | Tconstructorname (s, i) -> i
34e49164
C
228 | TypedefIdent (s, i) -> i
229
230 | TInt (s, i) -> i
231
ae4735db 232 | TDefine (ii) -> ii
34e49164
C
233 | TInclude (includes, filename, inifdef, i1) -> i1
234
3a314143 235 | TUndef (ii) -> ii
485bce71
C
236 | TCppDirectiveOther (ii) -> ii
237
34e49164
C
238 | TIncludeStart (i1, inifdef) -> i1
239 | TIncludeFilename (s, i1) -> i1
240
241 | TDefEOL (i1) -> i1
242 | TOParDefine (i1) -> i1
243 | TIdentDefine (s, i) -> i
244 | TCppEscapedNewline (ii) -> ii
245 | TDefParamVariadic (s, i1) -> i1
246
b1b2de81
C
247 | TCppConcatOp (ii) -> ii
248
485bce71
C
249 | TOBraceDefineInit (i1) -> i1
250
34e49164
C
251 | TUnknown (i) -> i
252
b1b2de81 253 | TMacroIdentBuilder (s, i) -> i
485bce71
C
254 | TMacroAttr (s, i) -> i
255 | TMacroAttrStorage (s, i) -> i
256 | TMacroStmt (s, i) -> i
257 | TMacroString (s, i) -> i
34e49164
C
258 | TMacroDecl (s, i) -> i
259 | TMacroDeclConst (i) -> i
260 | TMacroIterator (s,i) -> i
261(* | TMacroTop (s,i) -> i *)
262 | TCParEOL (i1) -> i1
263
264 | TAction (i) -> i
265
266 | TComment (i) -> i
267 | TCommentSpace (i) -> i
268 | TCommentNewline (i) -> i
269 | TCommentCpp (cppkind, i) -> i
270 | TCommentMisc (i) -> i
271
0708f913
C
272 | TCommentSkipTagStart (i) -> i
273 | TCommentSkipTagEnd (i) -> i
274
485bce71
C
275 | TIfdef (_, i) -> i
276 | TIfdefelse (_, i) -> i
277 | TIfdefelif (_, i) -> i
278 | TEndif (_, i) -> i
279 | TIfdefBool (b, _, i) -> i
280 | TIfdefMisc (b, _, i) -> i
281 | TIfdefVersion (b, _, i) -> i
34e49164
C
282
283 | TOPar (i) -> i
284 | TCPar (i) -> i
285 | TOBrace (i) -> i
286 | TCBrace (i) -> i
287 | TOCro (i) -> i
288 | TCCro (i) -> i
289 | TDot (i) -> i
290 | TComma (i) -> i
291 | TPtrOp (i) -> i
292 | TInc (i) -> i
293 | TDec (i) -> i
294 | TEq (i) -> i
295 | TWhy (i) -> i
296 | TTilde (i) -> i
297 | TBang (i) -> i
298 | TEllipsis (i) -> i
299 | TDotDot (i) -> i
300 | TPtVirg (i) -> i
301 | TOrLog (i) -> i
302 | TAndLog (i) -> i
303 | TOr (i) -> i
304 | TXor (i) -> i
305 | TAnd (i) -> i
306 | TEqEq (i) -> i
307 | TNotEq (i) -> i
308 | TInf (i) -> i
309 | TSup (i) -> i
310 | TInfEq (i) -> i
311 | TSupEq (i) -> i
312 | TShl (i) -> i
313 | TShr (i) -> i
314 | TPlus (i) -> i
315 | TMinus (i) -> i
316 | TMul (i) -> i
317 | TDiv (i) -> i
318 | TMod (i) -> i
1b9ae606
C
319 | TMax (i) -> i
320 | TMin (i) -> i
34e49164
C
321
322 | Tchar (i) -> i
323 | Tshort (i) -> i
324 | Tint (i) -> i
325 | Tdouble (i) -> i
326 | Tfloat (i) -> i
327 | Tlong (i) -> i
328 | Tunsigned (i) -> i
329 | Tsigned (i) -> i
330 | Tvoid (i) -> i
1eddfd50
C
331 | Tsize_t (i) -> i
332 | Tssize_t (i) -> i
333 | Tptrdiff_t (i) -> i
34e49164
C
334 | Tauto (i) -> i
335 | Tregister (i) -> i
336 | Textern (i) -> i
337 | Tstatic (i) -> i
338 | Tconst (i) -> i
339 | Tvolatile (i) -> i
485bce71
C
340
341 | Trestrict (i) -> i
342
34e49164
C
343 | Tstruct (i) -> i
344 | Tenum (i) -> i
345 | Ttypedef (i) -> i
346 | Tunion (i) -> i
347 | Tbreak (i) -> i
348 | Telse (i) -> i
349 | Tswitch (i) -> i
350 | Tcase (i) -> i
351 | Tcontinue (i) -> i
352 | Tfor (i) -> i
353 | Tdo (i) -> i
354 | Tif (i) -> i
355 | Twhile (i) -> i
356 | Treturn (i) -> i
357 | Tgoto (i) -> i
358 | Tdefault (i) -> i
359 | Tsizeof (i) -> i
360 | Tasm (i) -> i
361 | Tattribute (i) -> i
978fd7e5 362 | TattributeNoarg (i) -> i
34e49164
C
363 | Tinline (i) -> i
364 | Ttypeof (i) -> i
f59c9fb7 365 | Tnew (i) -> i
4dfbc1c2
C
366 | Tdelete (i) -> i
367 | TOParCplusplusInit (i) -> i
34e49164
C
368
369 | EOF (i) -> i
1b9ae606 370 | Tnamespace (i) -> i
ae4735db 371
34e49164
C
372
373
485bce71 374
34e49164
C
375(* used by tokens to complete the parse_info with filename, line, col infos *)
376let visitor_info_of_tok f = function
ae4735db
C
377 | TString ((s, isWchar), i) -> TString ((s, isWchar), f i)
378 | TChar ((s, isWchar), i) -> TChar ((s, isWchar), f i)
379 | TFloat ((s, floatType), i) -> TFloat ((s, floatType), f i)
380 | TAssign (assignOp, i) -> TAssign (assignOp, f i)
34e49164 381
f59c9fb7 382 | TIdent (s, i) -> TIdent (s, f i)
97111a47 383 | TKRParam(s, i) -> TKRParam(s, f i)
f59c9fb7
C
384 | Tconstructorname(s, i) -> Tconstructorname (s, f i)
385 | TypedefIdent (s, i) -> TypedefIdent (s, f i)
386 | TInt (s, i) -> TInt (s, f i)
34e49164 387
ae4735db 388 | TDefine (i1) -> TDefine(f i1)
34e49164 389
3a314143 390 | TUndef (i1) -> TUndef(f i1)
ae4735db 391 | TCppDirectiveOther (i1) -> TCppDirectiveOther(f i1)
485bce71 392
ae4735db 393 | TInclude (includes, filename, inifdef, i1) ->
34e49164
C
394 TInclude (includes, filename, inifdef, f i1)
395
396 | TIncludeStart (i1, inifdef) -> TIncludeStart (f i1, inifdef)
397 | TIncludeFilename (s, i1) -> TIncludeFilename (s, f i1)
398
399 | TCppEscapedNewline (i1) -> TCppEscapedNewline (f i1)
400 | TDefEOL (i1) -> TDefEOL (f i1)
b1b2de81
C
401
402 | TCppConcatOp (ii) -> TCppConcatOp (f ii)
403
34e49164
C
404 | TOParDefine (i1) -> TOParDefine (f i1)
405 | TIdentDefine (s, i) -> TIdentDefine (s, f i)
406
407 | TDefParamVariadic (s, i1) -> TDefParamVariadic (s, f i1)
408
485bce71
C
409 | TOBraceDefineInit (i1) -> TOBraceDefineInit (f i1)
410
34e49164
C
411
412 | TUnknown (i) -> TUnknown (f i)
413
b1b2de81 414 | TMacroIdentBuilder (s, i) -> TMacroIdentBuilder (s, f i)
485bce71
C
415 | TMacroAttr (s, i) -> TMacroAttr (s, f i)
416 | TMacroAttrStorage (s, i) -> TMacroAttrStorage (s, f i)
417 | TMacroStmt (s, i) -> TMacroStmt (s, f i)
418 | TMacroString (s, i) -> TMacroString (s, f i)
419 | TMacroDecl (s, i) -> TMacroDecl (s, f i)
34e49164 420 | TMacroDeclConst (i) -> TMacroDeclConst (f i)
485bce71 421 | TMacroIterator (s, i) -> TMacroIterator (s, f i)
34e49164
C
422(* | TMacroTop (s,i) -> TMacroTop (s,f i) *)
423 | TCParEOL (i) -> TCParEOL (f i)
424
425
426 | TAction (i) -> TAction (f i)
427
ae4735db
C
428 | TComment (i) -> TComment (f i)
429 | TCommentSpace (i) -> TCommentSpace (f i)
430 | TCommentNewline (i) -> TCommentNewline (f i)
431 | TCommentCpp (cppkind, i) -> TCommentCpp (cppkind, f i)
432 | TCommentMisc (i) -> TCommentMisc (f i)
433
434 | TCommentSkipTagStart (i) -> TCommentSkipTagStart (f i)
435 | TCommentSkipTagEnd (i) -> TCommentSkipTagEnd (f i)
436
437 | TIfdef (t, i) -> TIfdef (t, f i)
438 | TIfdefelse (t, i) -> TIfdefelse (t, f i)
439 | TIfdefelif (t, i) -> TIfdefelif (t, f i)
440 | TEndif (t, i) -> TEndif (t, f i)
441 | TIfdefBool (b, t, i) -> TIfdefBool (b, t, f i)
442 | TIfdefMisc (b, t, i) -> TIfdefMisc (b, t, f i)
443 | TIfdefVersion (b, t, i) -> TIfdefVersion (b, t, f i)
444
445 | TOPar (i) -> TOPar (f i)
446 | TCPar (i) -> TCPar (f i)
447 | TOBrace (i) -> TOBrace (f i)
448 | TCBrace (i) -> TCBrace (f i)
449 | TOCro (i) -> TOCro (f i)
450 | TCCro (i) -> TCCro (f i)
451 | TDot (i) -> TDot (f i)
452 | TComma (i) -> TComma (f i)
453 | TPtrOp (i) -> TPtrOp (f i)
454 | TInc (i) -> TInc (f i)
455 | TDec (i) -> TDec (f i)
456 | TEq (i) -> TEq (f i)
457 | TWhy (i) -> TWhy (f i)
458 | TTilde (i) -> TTilde (f i)
459 | TBang (i) -> TBang (f i)
460 | TEllipsis (i) -> TEllipsis (f i)
461 | TDotDot (i) -> TDotDot (f i)
462 | TPtVirg (i) -> TPtVirg (f i)
463 | TOrLog (i) -> TOrLog (f i)
464 | TAndLog (i) -> TAndLog (f i)
465 | TOr (i) -> TOr (f i)
466 | TXor (i) -> TXor (f i)
467 | TAnd (i) -> TAnd (f i)
468 | TEqEq (i) -> TEqEq (f i)
469 | TNotEq (i) -> TNotEq (f i)
470 | TInf (i) -> TInf (f i)
471 | TSup (i) -> TSup (f i)
472 | TInfEq (i) -> TInfEq (f i)
473 | TSupEq (i) -> TSupEq (f i)
474 | TShl (i) -> TShl (f i)
475 | TShr (i) -> TShr (f i)
476 | TPlus (i) -> TPlus (f i)
477 | TMinus (i) -> TMinus (f i)
478 | TMul (i) -> TMul (f i)
479 | TDiv (i) -> TDiv (f i)
1b9ae606
C
480 | TMax (i) -> TMax (f i)
481 | TMin (i) -> TMin (f i)
ae4735db
C
482 | TMod (i) -> TMod (f i)
483 | Tchar (i) -> Tchar (f i)
484 | Tshort (i) -> Tshort (f i)
485 | Tint (i) -> Tint (f i)
486 | Tdouble (i) -> Tdouble (f i)
487 | Tfloat (i) -> Tfloat (f i)
488 | Tlong (i) -> Tlong (f i)
489 | Tunsigned (i) -> Tunsigned (f i)
490 | Tsigned (i) -> Tsigned (f i)
491 | Tvoid (i) -> Tvoid (f i)
1eddfd50
C
492 | Tsize_t (i) -> Tsize_t (f i)
493 | Tssize_t (i) -> Tssize_t (f i)
494 | Tptrdiff_t (i) -> Tptrdiff_t (f i)
ae4735db
C
495 | Tauto (i) -> Tauto (f i)
496 | Tregister (i) -> Tregister (f i)
497 | Textern (i) -> Textern (f i)
498 | Tstatic (i) -> Tstatic (f i)
499 | Tconst (i) -> Tconst (f i)
500 | Tvolatile (i) -> Tvolatile (f i)
501
502 | Trestrict (i) -> Trestrict (f i)
503
504 | Tstruct (i) -> Tstruct (f i)
505 | Tenum (i) -> Tenum (f i)
506 | Ttypedef (i) -> Ttypedef (f i)
507 | Tunion (i) -> Tunion (f i)
508 | Tbreak (i) -> Tbreak (f i)
509 | Telse (i) -> Telse (f i)
510 | Tswitch (i) -> Tswitch (f i)
511 | Tcase (i) -> Tcase (f i)
512 | Tcontinue (i) -> Tcontinue (f i)
513 | Tfor (i) -> Tfor (f i)
514 | Tdo (i) -> Tdo (f i)
515 | Tif (i) -> Tif (f i)
516 | Twhile (i) -> Twhile (f i)
517 | Treturn (i) -> Treturn (f i)
518 | Tgoto (i) -> Tgoto (f i)
519 | Tdefault (i) -> Tdefault (f i)
520 | Tsizeof (i) -> Tsizeof (f i)
521 | Tasm (i) -> Tasm (f i)
522 | Tattribute (i) -> Tattribute (f i)
4dfbc1c2 523 | TattributeNoarg (i) -> TattributeNoarg (f i)
ae4735db
C
524 | Tinline (i) -> Tinline (f i)
525 | Ttypeof (i) -> Ttypeof (f i)
f59c9fb7 526 | Tnew (i) -> Tnew (f i)
4dfbc1c2
C
527 | Tdelete (i) -> Tdelete (f i)
528 | TOParCplusplusInit (i) -> TOParCplusplusInit (f i)
ae4735db 529 | EOF (i) -> EOF (f i)
1b9ae606 530 | Tnamespace (i) -> Tnamespace (f i)
ae4735db 531
34e49164
C
532
533(*****************************************************************************)
534(* Accessors *)
535(*****************************************************************************)
536
537let linecol_of_tok tok =
538 let info = info_of_tok tok in
539 Ast_c.line_of_info info, Ast_c.col_of_info info
540
541let col_of_tok x = snd (linecol_of_tok x)
542let line_of_tok x = fst (linecol_of_tok x)
543let pos_of_tok x = Ast_c.opos_of_info (info_of_tok x)
544let str_of_tok x = Ast_c.str_of_info (info_of_tok x)
545let file_of_tok x = Ast_c.file_of_info (info_of_tok x)
546let pinfo_of_tok x = Ast_c.pinfo_of_info (info_of_tok x)
547
548let is_origin x =
549 match pinfo_of_tok x with Ast_c.OriginTok _ -> true | _ -> false
550let is_expanded x =
551 match pinfo_of_tok x with Ast_c.ExpandedTok _ -> true | _ -> false
552let is_fake x =
553 match pinfo_of_tok x with Ast_c.FakeTok _ -> true | _ -> false
554let is_abstract x =
555 match pinfo_of_tok x with Ast_c.AbstractLineTok _ -> true | _ -> false
91eba41f
C
556
557(*****************************************************************************)
558(* Helpers *)
559(*****************************************************************************)
ae4735db
C
560let is_same_line_or_close line tok =
561 line_of_tok tok =|= line ||
b1b2de81
C
562 line_of_tok tok =|= line - 1 ||
563 line_of_tok tok =|= line - 2
0708f913 564