[bpt/coccinelle.git] / parsing_c / lexer_c.mll

{
(* Yoann Padioleau
 *
 * Copyright (C) 2002, 2006, 2007, 2008, 2009, Ecole des Mines de Nantes
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License (GPL)
 * version 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * file license.txt for more details.
 *)
open Common

open Parser_c

open Ast_c (* to factorise tokens, OpAssign, ... *)

(*****************************************************************************)
(*
 * subtil: ocamllex use side effect on lexbuf, so must take care.
 * For instance must do
 *
 *  let info = tokinfo lexbuf in
 *  TComment (info +> tok_add_s (comment lexbuf))
 *
 * and not
 *
 *   TComment (tokinfo lexbuf +> tok_add_s (comment lexbuf))
 *
 * because of the "wierd" order of evaluation of OCaml.
 *
 *
 *
 * note: can't use Lexer_parser._lexer_hint here to do different
 * things, because now we call the lexer to get all the tokens
 * (tokens_all), and then we parse. So we can't have the _lexer_hint
 * info here. We can have it only in parse_c. For the same reason, the
 * typedef handling here is now useless.
 *)
(*****************************************************************************)

(*****************************************************************************)
(* Wrappers *)
(*****************************************************************************)
let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_lexing

(*****************************************************************************)


exception Lexical of string

let tok     lexbuf  = Lexing.lexeme lexbuf

let tokinfo lexbuf  =
  {
    pinfo = Ast_c.OriginTok {
      Common.charpos = Lexing.lexeme_start lexbuf;
      Common.str     = Lexing.lexeme lexbuf;
      (* info filled in a post-lexing phase *)
      Common.line = -1;
      Common.column = -1;
      Common.file = "";
    };
   (* must generate a new ref each time, otherwise share *)
    cocci_tag = ref Ast_c.emptyAnnot;
    annots_tag = Token_annot.empty;
    comments_tag = ref Ast_c.emptyComments;
  }

(* cppext: must generate a new ref each time, otherwise share *)
let no_ifdef_mark () = ref (None: (int * int) option)

let tok_add_s s ii = Ast_c.rewrap_str ((Ast_c.str_of_info ii) ^ s) ii


(* opti: less convenient, but using a hash is faster than using a match *)
let keyword_table = Common.hash_of_list [

  (* c: *)
  "void",   (fun ii -> Tvoid ii);
  "char",   (fun ii -> Tchar ii);
  "short",  (fun ii -> Tshort ii);
  "int",    (fun ii -> Tint ii);
  "long",   (fun ii -> Tlong ii);
  "float",  (fun ii -> Tfloat ii);
  "double", (fun ii -> Tdouble ii);
  "size_t", (fun ii -> Tsize_t ii);
  "ssize_t", (fun ii -> Tssize_t ii);
  "ptrdiff_t", (fun ii -> Tptrdiff_t ii);

  "unsigned", (fun ii -> Tunsigned ii);
  "signed",   (fun ii -> Tsigned ii);

  "auto",     (fun ii -> Tauto ii);
  "register", (fun ii -> Tregister ii);
  "extern",   (fun ii -> Textern ii);
  "static",   (fun ii -> Tstatic ii);

  "const",    (fun ii -> Tconst ii);
  "volatile", (fun ii -> Tvolatile ii);

  "struct",  (fun ii -> Tstruct ii);
  "union",   (fun ii -> Tunion ii);
  "enum",    (fun ii -> Tenum ii);
  "typedef", (fun ii -> Ttypedef ii);

  "if",      (fun ii -> Tif ii);
  "else",     (fun ii -> Telse ii);
  "break",   (fun ii -> Tbreak ii);
  "continue", (fun ii -> Tcontinue ii);
  "switch",  (fun ii -> Tswitch ii);
  "case",     (fun ii -> Tcase ii);
  "default", (fun ii -> Tdefault ii);
  "for",     (fun ii -> Tfor ii);
  "do",      (fun ii -> Tdo ii);
  "while",   (fun ii -> Twhile ii);
  "return",  (fun ii -> Treturn ii);
  "goto",    (fun ii -> Tgoto ii);

  "sizeof", (fun ii -> Tsizeof ii);


  (* gccext: cppext: linuxext: synonyms *)
  "asm",     (fun ii -> Tasm ii);
  "__asm__", (fun ii -> Tasm ii);
  "__asm",   (fun ii -> Tasm ii);

  "inline",     (fun ii -> Tinline ii);
  "__inline__", (fun ii -> Tinline ii);
  "__inline",   (fun ii -> Tinline ii);

  "__attribute__", (fun ii -> Tattribute ii);
  "__attribute", (fun ii -> Tattribute ii);

  "typeof", (fun ii -> Ttypeof ii);
  "__typeof__", (fun ii -> Ttypeof ii);
  "__typeof", (fun ii -> Ttypeof ii);

        (* found a lot in expanded code *)
  "__extension__", (fun ii -> TattributeNoarg ii);


  (* gccext: alias *)
  "__signed__",     (fun ii -> Tsigned ii);

  "__const__",     (fun ii -> Tconst ii);
  "__const",     (fun ii -> Tconst ii);

  "__volatile__",  (fun ii -> Tvolatile ii);
  "__volatile",    (fun ii -> Tvolatile ii);

  (* windowsext: *)
  "__declspec", (fun ii -> Tattribute ii);

  "__stdcall", (fun ii -> TattributeNoarg ii);
  "__cdecl", (fun ii -> TattributeNoarg ii);
  "WINAPI", (fun ii -> TattributeNoarg ii);
  "APIENTRY", (fun ii -> TattributeNoarg ii);
  "CALLBACK", (fun ii -> TattributeNoarg ii);

  (* c99:  *)
  (* no just "restrict" ? maybe for backward compatibility they avoided
   * to use restrict which people may have used in their program already
   *)
  "__restrict",    (fun ii -> Trestrict ii);
  "__restrict__",    (fun ii -> Trestrict ii);

 ]

let cpp_keyword_table = Common.hash_of_list [
  "namespace", (fun ii -> Tnamespace ii);
  "new",       (fun ii -> Tnew ii);
  "delete",    (fun ii -> Tdelete ii);
  "using",     (fun ii -> TComment ii) ]

let error_radix s =
  ("numeric " ^ s ^ " constant contains digits beyond the radix:")

(* julia: functions for figuring out the type of integers *)

let is_long_dec s int uint long ulong =
  match !Flag_parsing_c.int_thresholds with
    None -> int
  | Some (_,_,uint_threshold,long_threshold,ulong_threshold) ->
      let bn = Big_int.big_int_of_string s in
      if Big_int.ge_big_int bn ulong_threshold
      then ulong
      else
	if Big_int.ge_big_int bn long_threshold
	then long
	else
	  if Big_int.ge_big_int bn uint_threshold
	  then long
	  else int

let is_long_ho s int uint long ulong drop bpd count =
  match !Flag_parsing_c.int_thresholds with
    None -> int
  | Some (uint_sz,ulong_sz,_,_,_) ->
      let len = String.length s in
      (* this assumes that all of the hex/oct digits are significant *)
      (* drop is 2 for hex (0x) and 1 for oct (0) *)
      let s = String.sub s drop (len - drop) in
      let len =
	((len-drop) * bpd) -
	  (count (int_of_string("0x"^(String.sub s 0 1)))) in
      if len < uint_sz
      then int
      else
	if len = uint_sz
	then uint
	else
	  if len < ulong_sz
	  then long
	  else ulong

let is_long_oct s int uint long ulong =
  is_long_ho s int uint long ulong 1 3
    (* stupid, but probably more efficient than taking logs *)
    (function 0 -> 3 | 1 -> 2 | n when n < 4 -> 1 | _ -> 0)
let is_long_hex s int uint long ulong =
  is_long_ho s int uint long ulong 2 4
    (* stupid, but probably more efficient than taking logs *)
    (function 0 -> 4 | 1 -> 3 | n when n < 4 -> 2 | n when n < 8 -> 1
      | _ -> 0)

let sint = (Signed,CInt)
let uint = (UnSigned,CInt)
let slong = (Signed,CLong)
let ulong = (UnSigned,CLong)

}

(*****************************************************************************)
let letter = ['A'-'Z' 'a'-'z' '_']
let extended_letter = ['A'-'Z' 'a'-'z' '_' ':' '<' '>' '~'](*for c++, not used*)
let digit  = ['0'-'9']

let cplusplus_ident = (letter | '$') (letter | digit | '$') *
let cplusplus_ident_ext = (letter | '~' | '$') (letter | digit | '~' | '$') *

(* not used for the moment *)
let punctuation = ['!' '\"' '#' '%' '&' '\'' '(' ')' '*' '+' ',' '-' '.' '/' ':'
		   ';' '<' '=' '>' '?' '[' '\\' ']' '^' '{' '|' '}' '~']  
let space = [' ' '\t' '\n' '\r' '\011' '\012' ]
let additionnal = [ ' ' '\b' '\t' '\011' '\n' '\r' '\007' ]
(* 7 = \a = bell in C. this is not the only char allowed !!
 * ex @ and $ ` are valid too
 *)

let cchar = (letter | digit | punctuation | additionnal)

let sp = [' ' '\t']+
let spopt = [' ' '\t']*

let dec = ['0'-'9']
let oct = ['0'-'7']
let hex = ['0'-'9' 'a'-'f' 'A'-'F']

let decimal = ('0' | (['1'-'9'] dec*))
let octal   = ['0']        oct+
let hexa    = ("0x" |"0X") hex+


let pent   = dec+
let pfract = dec+
let sign = ['-' '+']
let exp  = ['e''E'] sign? dec+
let real = pent exp | ((pent? '.' pfract | pent '.' pfract? ) exp?)

let id = letter (letter | digit) *

(*****************************************************************************)
rule token = parse

  (* ----------------------------------------------------------------------- *)
  (* spacing/comments *)
  (* ----------------------------------------------------------------------- *)

  (* note: this lexer generate tokens for comments!! so can not give
   * this lexer as-is to the parsing function. The caller must preprocess
   * it, e.g. by using techniques like cur_tok ref in parse_c.ml.
   *
   * update: we now also generate a separate token for newlines, so now
   * the caller may also have to reagglomerate all those commentspace
   * tokens if he was assuming that spaces were agglomerate in a single
   * token.
   *)

  | ['\n'] [' ' '\t' '\r' '\011' '\012' ]*
      (* starting a new line; the newline character followed by whitespace *)
      { TCommentNewline (tokinfo lexbuf) }
  | [' ' '\t' '\r' '\011' '\012' ]+
      { TCommentSpace (tokinfo lexbuf) }
  | "/*"
      { let info = tokinfo lexbuf in
        let com = comment lexbuf in

        let info' = info +> tok_add_s com in
        let s = Ast_c.str_of_info info' in
        (* could be more flexible, use [\t ]* instead of hardcoded
         * single space. *)
        match s with
        | "/* {{coccinelle:skip_start}} */" ->
            TCommentSkipTagStart (info')
        | "/* {{coccinelle:skip_end}} */" ->
            TCommentSkipTagEnd (info')
        | _ -> TComment(info')
      }


  (* C++ comment are allowed via gccext, but normally they are deleted by cpp.
   * So need this here only when dont call cpp before.
   * note that we don't keep the trailing \n; it will be in another token.
   *)
  | "//" [^'\r' '\n' '\011']*    { TComment (tokinfo lexbuf) }

  (* ----------------------------------------------------------------------- *)
  (* cpp *)
  (* ----------------------------------------------------------------------- *)

  (* old:
   *   | '#'		{ endline lexbuf} // should be line, and not endline
   *   and endline = parse  | '\n' 	{ token lexbuf}
   *                        |	_	{ endline lexbuf}
   *)

  (* less?:
   *  have found a # #else  in "newfile-2.6.c",  legal ?   and also a  #/* ...
   *    => just "#" -> token {lexbuf} (that is ignore)
   *  il y'a 1 #elif  sans rien  apres
   *  il y'a 1 #error sans rien  apres
   *  il y'a 2  mov dede, #xxx    qui genere du coup exn car
Commit	Line	Data
34e49164	1	{
0708f913	2	(* Yoann Padioleau
ae4735db	3	*
0708f913	4	* Copyright (C) 2002, 2006, 2007, 2008, 2009, Ecole des Mines de Nantes
34e49164 C	5	*
	6	* This program is free software; you can redistribute it and/or
	7	* modify it under the terms of the GNU General Public License (GPL)
	8	* version 2 as published by the Free Software Foundation.
ae4735db	9	*
34e49164 C	10	* This program is distributed in the hope that it will be useful,
	11	* but WITHOUT ANY WARRANTY; without even the implied warranty of
	12	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	13	* file license.txt for more details.
	14	*)
	15	open Common
	16
	17	open Parser_c
	18
	19	open Ast_c (* to factorise tokens, OpAssign, ... *)
	20
	21	(*****************************************************************************)
	22	(*
ae4735db C	23	* subtil: ocamllex use side effect on lexbuf, so must take care.
	24	* For instance must do
	25	*
	26	* let info = tokinfo lexbuf in
	27	* TComment (info +> tok_add_s (comment lexbuf))
	28	*
	29	* and not
	30	*
	31	* TComment (tokinfo lexbuf +> tok_add_s (comment lexbuf))
	32	*
34e49164	33	* because of the "wierd" order of evaluation of OCaml.
ae4735db C	34	*
ae4735db C	35	*
34e49164 C	36	*
	37	* note: can't use Lexer_parser._lexer_hint here to do different
	38	* things, because now we call the lexer to get all the tokens
	39	* (tokens_all), and then we parse. So we can't have the _lexer_hint
	40	* info here. We can have it only in parse_c. For the same reason, the
ae4735db	41	* typedef handling here is now useless.
34e49164 C	42	*)
	43	(*****************************************************************************)
	44
	45	(*****************************************************************************)
	46	(* Wrappers *)
	47	(*****************************************************************************)
ae4735db	48	let pr2, pr2_once = Common.mk_pr2_wrappers Flag_parsing_c.verbose_lexing
34e49164 C	49
	50	(*****************************************************************************)
	51
	52
	53	exception Lexical of string
	54
	55	let tok lexbuf = Lexing.lexeme lexbuf
	56
ae4735db C	57	let tokinfo lexbuf =
ae4735db C	58	{
34e49164	59	pinfo = Ast_c.OriginTok {
ae4735db	60	Common.charpos = Lexing.lexeme_start lexbuf;
34e49164 C	61	Common.str = Lexing.lexeme lexbuf;
34e49164 C	62	(* info filled in a post-lexing phase *)
ae4735db C	63	Common.line = -1;
ae4735db C	64	Common.column = -1;
34e49164 C	65	Common.file = "";
	66	};
	67	(* must generate a new ref each time, otherwise share *)
	68	cocci_tag = ref Ast_c.emptyAnnot;
abad11c5	69	annots_tag = Token_annot.empty;
34e49164 C	70	comments_tag = ref Ast_c.emptyComments;
	71	}
	72
b1b2de81	73	(* cppext: must generate a new ref each time, otherwise share *)
485bce71 C	74	let no_ifdef_mark () = ref (None: (int * int) option)
485bce71 C	75
34e49164	76	let tok_add_s s ii = Ast_c.rewrap_str ((Ast_c.str_of_info ii) ^ s) ii
ae4735db	77
34e49164 C	78
	79	(* opti: less convenient, but using a hash is faster than using a match *)
	80	let keyword_table = Common.hash_of_list [
	81
485bce71	82	(* c: *)
ae4735db C	83	"void", (fun ii -> Tvoid ii);
	84	"char", (fun ii -> Tchar ii);
	85	"short", (fun ii -> Tshort ii);
	86	"int", (fun ii -> Tint ii);
	87	"long", (fun ii -> Tlong ii);
	88	"float", (fun ii -> Tfloat ii);
	89	"double", (fun ii -> Tdouble ii);
1eddfd50 C	90	"size_t", (fun ii -> Tsize_t ii);
	91	"ssize_t", (fun ii -> Tssize_t ii);
	92	"ptrdiff_t", (fun ii -> Tptrdiff_t ii);
ae4735db C	93
ae4735db C	94	"unsigned", (fun ii -> Tunsigned ii);
34e49164	95	"signed", (fun ii -> Tsigned ii);
ae4735db C	96
	97	"auto", (fun ii -> Tauto ii);
	98	"register", (fun ii -> Tregister ii);
	99	"extern", (fun ii -> Textern ii);
34e49164 C	100	"static", (fun ii -> Tstatic ii);
	101
	102	"const", (fun ii -> Tconst ii);
ae4735db C	103	"volatile", (fun ii -> Tvolatile ii);
	104
	105	"struct", (fun ii -> Tstruct ii);
	106	"union", (fun ii -> Tunion ii);
	107	"enum", (fun ii -> Tenum ii);
	108	"typedef", (fun ii -> Ttypedef ii);
	109
	110	"if", (fun ii -> Tif ii);
	111	"else", (fun ii -> Telse ii);
	112	"break", (fun ii -> Tbreak ii);
34e49164	113	"continue", (fun ii -> Tcontinue ii);
ae4735db C	114	"switch", (fun ii -> Tswitch ii);
	115	"case", (fun ii -> Tcase ii);
	116	"default", (fun ii -> Tdefault ii);
	117	"for", (fun ii -> Tfor ii);
	118	"do", (fun ii -> Tdo ii);
	119	"while", (fun ii -> Twhile ii);
34e49164	120	"return", (fun ii -> Treturn ii);
ae4735db C	121	"goto", (fun ii -> Tgoto ii);
	122
	123	"sizeof", (fun ii -> Tsizeof ii);
34e49164 C	124
	125
	126	(* gccext: cppext: linuxext: synonyms *)
	127	"asm", (fun ii -> Tasm ii);
	128	"__asm__", (fun ii -> Tasm ii);
	129	"__asm", (fun ii -> Tasm ii);
	130
	131	"inline", (fun ii -> Tinline ii);
	132	"__inline__", (fun ii -> Tinline ii);
	133	"__inline", (fun ii -> Tinline ii);
34e49164 C	134
	135	"__attribute__", (fun ii -> Tattribute ii);
	136	"__attribute", (fun ii -> Tattribute ii);
	137
	138	"typeof", (fun ii -> Ttypeof ii);
	139	"__typeof__", (fun ii -> Ttypeof ii);
485bce71 C	140	"__typeof", (fun ii -> Ttypeof ii);
485bce71 C	141
978fd7e5	142	(* found a lot in expanded code *)
ae4735db	143	"__extension__", (fun ii -> TattributeNoarg ii);
978fd7e5	144
34e49164	145
485bce71	146	(* gccext: alias *)
34e49164 C	147	"__signed__", (fun ii -> Tsigned ii);
	148
	149	"__const__", (fun ii -> Tconst ii);
	150	"__const", (fun ii -> Tconst ii);
	151
ae4735db C	152	"__volatile__", (fun ii -> Tvolatile ii);
ae4735db C	153	"__volatile", (fun ii -> Tvolatile ii);
485bce71	154
978fd7e5 C	155	(* windowsext: *)
	156	"__declspec", (fun ii -> Tattribute ii);
	157
	158	"__stdcall", (fun ii -> TattributeNoarg ii);
	159	"__cdecl", (fun ii -> TattributeNoarg ii);
	160	"WINAPI", (fun ii -> TattributeNoarg ii);
	161	"APIENTRY", (fun ii -> TattributeNoarg ii);
	162	"CALLBACK", (fun ii -> TattributeNoarg ii);
485bce71 C	163
485bce71 C	164	(* c99: *)
ae4735db C	165	(* no just "restrict" ? maybe for backward compatibility they avoided
ae4735db C	166	* to use restrict which people may have used in their program already
485bce71	167	*)
ae4735db C	168	"__restrict", (fun ii -> Trestrict ii);
	169	"__restrict__", (fun ii -> Trestrict ii);
	170
34e49164 C	171	]
34e49164 C	172
f59c9fb7	173	let cpp_keyword_table = Common.hash_of_list [
1b9ae606 C	174	"namespace", (fun ii -> Tnamespace ii);
	175	"new", (fun ii -> Tnew ii);
	176	"delete", (fun ii -> Tdelete ii);
	177	"using", (fun ii -> TComment ii) ]
f59c9fb7	178
ae4735db	179	let error_radix s =
34e49164 C	180	("numeric " ^ s ^ " constant contains digits beyond the radix:")
34e49164 C	181
978fd7e5	182	(* julia: functions for figuring out the type of integers *)
708f4980 C	183
	184	let is_long_dec s int uint long ulong =
	185	match !Flag_parsing_c.int_thresholds with
	186	None -> int
	187	\| Some (_,_,uint_threshold,long_threshold,ulong_threshold) ->
	188	let bn = Big_int.big_int_of_string s in
	189	if Big_int.ge_big_int bn ulong_threshold
	190	then ulong
	191	else
	192	if Big_int.ge_big_int bn long_threshold
	193	then long
	194	else
	195	if Big_int.ge_big_int bn uint_threshold
	196	then long
	197	else int
	198
	199	let is_long_ho s int uint long ulong drop bpd count =
	200	match !Flag_parsing_c.int_thresholds with
	201	None -> int
	202	\| Some (uint_sz,ulong_sz,_,_,_) ->
	203	let len = String.length s in
	204	(* this assumes that all of the hex/oct digits are significant *)
	205	(* drop is 2 for hex (0x) and 1 for oct (0) *)
	206	let s = String.sub s drop (len - drop) in
	207	let len =
	208	((len-drop) * bpd) -
	209	(count (int_of_string("0x"^(String.sub s 0 1)))) in
	210	if len < uint_sz
	211	then int
	212	else
	213	if len = uint_sz
	214	then uint
	215	else
	216	if len < ulong_sz
	217	then long
	218	else ulong
	219
	220	let is_long_oct s int uint long ulong =
	221	is_long_ho s int uint long ulong 1 3
	222	(* stupid, but probably more efficient than taking logs *)
	223	(function 0 -> 3 \| 1 -> 2 \| n when n < 4 -> 1 \| _ -> 0)
	224	let is_long_hex s int uint long ulong =
	225	is_long_ho s int uint long ulong 2 4
	226	(* stupid, but probably more efficient than taking logs *)
	227	(function 0 -> 4 \| 1 -> 3 \| n when n < 4 -> 2 \| n when n < 8 -> 1
	228	\| _ -> 0)
	229
	230	let sint = (Signed,CInt)
	231	let uint = (UnSigned,CInt)
	232	let slong = (Signed,CLong)
	233	let ulong = (UnSigned,CLong)
	234
34e49164 C	235	}
	236
	237	(*****************************************************************************)
	238	let letter = ['A'-'Z' 'a'-'z' '_']
3a314143	239	let extended_letter = ['A'-'Z' 'a'-'z' '_' ':' '<' '>' '~'](for c++, not used)
34e49164 C	240	let digit = ['0'-'9']
34e49164 C	241
1b9ae606 C	242	let cplusplus_ident = (letter \| '$') (letter \| digit \| '$') *
	243	let cplusplus_ident_ext = (letter \| '~' \| '$') (letter \| digit \| '~' \| '$') *
	244
34e49164	245	(* not used for the moment *)
abad11c5 C	246	let punctuation = ['!' '\"' '#' '%' '&' '\'' '(' ')' '*' '+' ',' '-' '.' '/' ':'
abad11c5 C	247	';' '<' '=' '>' '?' '[' '\\' ']' '^' '{' '\|' '}' '~']
34e49164	248	let space = [' ' '\t' '\n' '\r' '\011' '\012' ]
ae4735db C	249	let additionnal = [ ' ' '\b' '\t' '\011' '\n' '\r' '\007' ]
	250	(* 7 = \a = bell in C. this is not the only char allowed !!
	251	* ex @ and $ ` are valid too
34e49164 C	252	*)
34e49164 C	253
ae4735db	254	let cchar = (letter \| digit \| punctuation \| additionnal)
34e49164 C	255
	256	let sp = [' ' '\t']+
	257	let spopt = [' ' '\t']*
	258
	259	let dec = ['0'-'9']
	260	let oct = ['0'-'7']
	261	let hex = ['0'-'9' 'a'-'f' 'A'-'F']
	262
	263	let decimal = ('0' \| (['1'-'9'] dec*))
	264	let octal = ['0'] oct+
ae4735db	265	let hexa = ("0x" \|"0X") hex+
34e49164 C	266
	267
	268	let pent = dec+
	269	let pfract = dec+
	270	let sign = ['-' '+']
	271	let exp = ['e''E'] sign? dec+
	272	let real = pent exp \| ((pent? '.' pfract \| pent '.' pfract? ) exp?)
	273
	274	let id = letter (letter \| digit) *
	275
	276	(*****************************************************************************)
	277	rule token = parse
	278
	279	(* ----------------------------------------------------------------------- *)
	280	(* spacing/comments *)
	281	(* ----------------------------------------------------------------------- *)
	282
ae4735db C	283	(* note: this lexer generate tokens for comments!! so can not give
ae4735db C	284	* this lexer as-is to the parsing function. The caller must preprocess
978fd7e5	285	* it, e.g. by using techniques like cur_tok ref in parse_c.ml.
ae4735db	286	*
978fd7e5 C	287	* update: we now also generate a separate token for newlines, so now
	288	* the caller may also have to reagglomerate all those commentspace
	289	* tokens if he was assuming that spaces were agglomerate in a single
ae4735db	290	* token.
34e49164 C	291	*)
	292
	293	\| ['\n'] [' ' '\t' '\r' '\011' '\012' ]*
	294	(* starting a new line; the newline character followed by whitespace *)
	295	{ TCommentNewline (tokinfo lexbuf) }
ae4735db	296	\| [' ' '\t' '\r' '\011' '\012' ]+
34e49164	297	{ TCommentSpace (tokinfo lexbuf) }
ae4735db C	298	\| "/*"
ae4735db C	299	{ let info = tokinfo lexbuf in
34e49164	300	let com = comment lexbuf in
0708f913 C	301
	302	let info' = info +> tok_add_s com in
	303	let s = Ast_c.str_of_info info' in
ae4735db	304	(* could be more flexible, use [\t ]* instead of hardcoded
b1b2de81	305	* single space. *)
0708f913	306	match s with
ae4735db	307	\| "/* {{coccinelle:skip_start}} */" ->
0708f913	308	TCommentSkipTagStart (info')
ae4735db	309	\| "/* {{coccinelle:skip_end}} */" ->
0708f913	310	TCommentSkipTagEnd (info')
ae4735db	311	\| _ -> TComment(info')
34e49164 C	312	}
	313
	314
	315	(* C++ comment are allowed via gccext, but normally they are deleted by cpp.
	316	* So need this here only when dont call cpp before.
485bce71	317	* note that we don't keep the trailing \n; it will be in another token.
34e49164	318	*)
ae4735db	319	\| "//" [^'\r' '\n' '\011']* { TComment (tokinfo lexbuf) }
34e49164 C	320
	321	(* ----------------------------------------------------------------------- *)
	322	(* cpp *)
	323	(* ----------------------------------------------------------------------- *)
	324
	325	(* old:
ae4735db C	326	* \| '#' { endline lexbuf} // should be line, and not endline
	327	* and endline = parse \| '\n' { token lexbuf}
	328	* \| _ { endline lexbuf}
34e49164 C	329	*)
34e49164 C	330
b1b2de81	331	(* less?:
ae4735db	332	* have found a # #else in "newfile-2.6.c", legal ? and also a #/* ...
34e49164 C	333	* => just "#" -> token {lexbuf} (that is ignore)
	334	* il y'a 1 #elif sans rien apres
	335	* il y'a 1 #error sans rien apres
	336	* il y'a 2 mov dede, #xxx qui genere du coup exn car
	337