X-Git-Url: http://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/34e491640531bd81a0e2238fd599e1aafe53613e..9f8e26f459677a621822918b7539ae94214621ac:/parsing_cocci/lexer_cocci.mll

diff --git a/parsing_cocci/lexer_cocci.mll b/parsing_cocci/lexer_cocci.mll
index c066d1d..6abc9b1 100644
--- a/parsing_cocci/lexer_cocci.mll
+++ b/parsing_cocci/lexer_cocci.mll
@@ -1,23 +1,23 @@
 (*
-* Copyright 2005-2008, Ecole des Mines de Nantes, University of Copenhagen
-* Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller
-* This file is part of Coccinelle.
-* 
-* Coccinelle is free software: you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation, according to version 2 of the License.
-* 
-* Coccinelle is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-* GNU General Public License for more details.
-* 
-* You should have received a copy of the GNU General Public License
-* along with Coccinelle.  If not, see <http://www.gnu.org/licenses/>.
-* 
-* The authors reserve the right to distribute this or future versions of
-* Coccinelle under other licenses.
-*)
+ * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen
+ * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
+ * This file is part of Coccinelle.
+ *
+ * Coccinelle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, according to version 2 of the License.
+ *
+ * Coccinelle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Coccinelle.  If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The authors reserve the right to distribute this or future versions of
+ * Coccinelle under other licenses.
+ *)
 
 
 {
@@ -48,8 +48,8 @@ let get_current_line_type lexbuf =
   let lex_start = Lexing.lexeme_start lexbuf in
   let preceeding_spaces =
     if !line_start < 0 then 0 else lex_start - !line_start in
-  line_start := -1;
-  prev_plus := (c = D.PLUS);
+  (*line_start := -1;*)
+  prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS);
   (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos)
 let current_line_started = ref false
 let col_zero = ref true
@@ -87,6 +87,8 @@ let add_current_line_type x =
       current_line_type := (D.OPTMINUS,ln,lln)
   | (D.PLUS,(D.CONTEXT,ln,lln))   ->
       current_line_type := (D.PLUS,ln,lln)
+  | (D.PLUSPLUS,(D.CONTEXT,ln,lln))   ->
+      current_line_type := (D.PLUSPLUS,ln,lln)
   | (D.UNIQUE,(D.CONTEXT,ln,lln)) ->
       current_line_type := (D.UNIQUE,ln,lln)
   | (D.OPT,(D.CONTEXT,ln,lln))    ->
@@ -95,7 +97,7 @@ let add_current_line_type x =
 
 let check_minus_context_linetype s =
   match !current_line_type with
-    (D.PLUS,_,_) -> lexerr "invalid in a + context: " s
+    (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> lexerr "invalid in a + context: " s
   | _ -> ()
 
 let check_context_linetype s =
@@ -105,17 +107,18 @@ let check_context_linetype s =
 
 let check_plus_linetype s =
   match !current_line_type with
-    (D.PLUS,_,_) -> ()
+    (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> ()
   | _ -> lexerr "invalid in a non + context: " s
 
 let check_arity_context_linetype s =
   match !current_line_type with
-    (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
+    (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.PLUSPLUS,_,_)
+  | (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
   | _ -> lexerr "invalid in a nonempty context: " s
 
 let process_include start finish str =
   (match !current_line_type with
-    (D.PLUS,_,_) ->
+    (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
       (try
 	let _ = Str.search_forward (Str.regexp "\\.\\.\\.") str start in
 	lexerr "... not allowed in + include" ""
@@ -130,15 +133,20 @@ let pm = ref UNKNOWN
 
 let patch_or_match = function
     PATCH ->
-      (match !pm with
-	MATCH -> lexerr "- or + not allowed in the first column for a match" ""
-      |	PATCH -> ()
-      |	UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
+      if not !D.ignore_patch_or_match
+      then
+	(match !pm with
+	  MATCH ->
+	    lexerr "- or + not allowed in the first column for a match" ""
+	| PATCH -> ()
+	| UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
   | MATCH ->
-      (match !pm with
-	PATCH -> lexerr "* not allowed in the first column for a patch" ""
-      |	MATCH -> ()
-      |	UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
+      if not !D.ignore_patch_or_match
+      then
+	(match !pm with
+	  PATCH -> lexerr "* not allowed in the first column for a patch" ""
+	| MATCH -> ()
+	| UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
   | _ -> failwith "unexpected argument"
 
 (* ---------------------------------------------------------------------- *)
@@ -168,7 +176,7 @@ let check_var s linetype =
 	(try (Hashtbl.find type_names s) linetype
 	with Not_found ->
 	  (try (Hashtbl.find declarer_names s) linetype
-	  with Not_found -> 
+	  with Not_found ->
 	    (try (Hashtbl.find iterator_names s) linetype
 	    with Not_found -> TIdent (s,linetype)))) in
   if !Data.in_meta or !Data.in_rule_name
@@ -179,7 +187,7 @@ let id_tokens lexbuf =
   let s = tok lexbuf in
   let linetype = get_current_line_type lexbuf in
   let in_rule_name = !Data.in_rule_name in
-  let in_meta = !Data.in_meta in
+  let in_meta = !Data.in_meta && not !Data.saw_struct in
   let in_iso = !Data.in_iso in
   let in_prolog = !Data.in_prolog in
   match s with
@@ -187,8 +195,14 @@ let id_tokens lexbuf =
   | "type" when in_meta ->       check_arity_context_linetype s; TType
   | "parameter" when in_meta ->  check_arity_context_linetype s; TParameter
   | "constant"  when in_meta ->  check_arity_context_linetype s; TConstant
+  | "generated" when in_rule_name && not (!Flag.make_hrule = None) ->
+      check_arity_context_linetype s; TGenerated
   | "expression" when in_meta || in_rule_name ->
       check_arity_context_linetype s; TExpression
+  | "initialiser" when in_meta || in_rule_name ->
+      check_arity_context_linetype s; TInitialiser
+  | "initializer" when in_meta || in_rule_name ->
+      check_arity_context_linetype s; TInitialiser
   | "idexpression" when in_meta ->
       check_arity_context_linetype s; TIdExpression
   | "statement" when in_meta ->  check_arity_context_linetype s; TStatement
@@ -210,15 +224,19 @@ let id_tokens lexbuf =
   | "words" when in_meta ->      check_context_linetype s; TWords
 
   | "using" when in_rule_name || in_prolog ->  check_context_linetype s; TUsing
+  | "virtual" when in_prolog ->  check_context_linetype s; TVirtual
   | "disable" when in_rule_name ->  check_context_linetype s; TDisable
   | "extends" when in_rule_name -> check_context_linetype s; TExtends
   | "depends" when in_rule_name -> check_context_linetype s; TDepends
   | "on" when in_rule_name      -> check_context_linetype s; TOn
   | "ever" when in_rule_name    -> check_context_linetype s; TEver
   | "never" when in_rule_name   -> check_context_linetype s; TNever
+  (* exists and forall for when are reparsed in parse_cocci.ml *)
   | "exists" when in_rule_name  -> check_context_linetype s; TExists
   | "forall" when in_rule_name  -> check_context_linetype s; TForall
-  | "reverse" when in_rule_name -> check_context_linetype s; TReverse
+  | "script" when in_rule_name  -> check_context_linetype s; TScript
+  | "initialize" when in_rule_name -> check_context_linetype s; TInitialize
+  | "finalize" when in_rule_name   -> check_context_linetype s; TFinalize
 
   | "char" ->       Tchar     linetype
   | "short" ->      Tshort    linetype
@@ -227,11 +245,14 @@ let id_tokens lexbuf =
   | "float" ->      Tfloat    linetype
   | "long" ->       Tlong     linetype
   | "void" ->       Tvoid     linetype
-  | "struct" ->     Tstruct   linetype
-  | "union" ->      Tunion    linetype
+  (* in_meta is only for the first keyword; drop it now to allow any type
+     name *)
+  | "struct" ->     Data.saw_struct := true; Tstruct   linetype
+  | "union" ->      Data.saw_struct := true; Tunion    linetype
+  | "enum" ->       Data.saw_struct := true; Tenum     linetype
   | "unsigned" ->   Tunsigned linetype
   | "signed" ->     Tsigned   linetype
-	
+
   | "auto"  ->      Tauto     linetype
   | "register" ->   Tregister linetype
   | "extern" ->     Textern   linetype
@@ -281,21 +302,32 @@ let init _ =
   Data.in_rule_name := false;
   Data.in_meta := false;
   Data.in_prolog := false;
+  Data.saw_struct := false;
   Data.inheritable_positions := [];
   Hashtbl.clear all_metavariables;
   Hashtbl.clear Data.all_metadecls;
   Hashtbl.clear metavariables;
   Hashtbl.clear type_names;
   Hashtbl.clear rule_names;
+  Hashtbl.clear iterator_names;
+  Hashtbl.clear declarer_names;
   let get_name (_,x) = x in
   Data.add_id_meta :=
     (fun name constraints pure ->
       let fn clt = TMetaId(name,constraints,pure,clt) in
       Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_fresh_id_meta :=
+    (fun name ->
+      let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
   Data.add_type_meta :=
     (fun name pure ->
       let fn clt = TMetaType(name,pure,clt) in
       Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_init_meta :=
+    (fun name pure ->
+      let fn clt = TMetaInit(name,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
   Data.add_param_meta :=
     (function name -> function pure ->
       let fn clt = TMetaParam(name,pure,clt) in
@@ -404,7 +436,7 @@ let hex = ['0'-'9' 'a'-'f' 'A'-'F']
 
 let decimal = ('0' | (['1'-'9'] dec*))
 let octal   = ['0']        oct+
-let hexa    = ("0x" |"0X") hex+ 
+let hexa    = ("0x" |"0X") hex+
 
 let pent   = dec+
 let pfract = dec+
@@ -425,6 +457,8 @@ rule token = parse
 	   then (start_line true; TArob)
 	   else (check_minus_context_linetype "@"; TPArob) }
 
+  | "~="  { start_line true; TTildeEq (get_current_line_type lexbuf) }
+  | "!~=" { start_line true; TTildeExclEq (get_current_line_type lexbuf) }
   | "WHEN" | "when"
       { start_line true; check_minus_context_linetype (tok lexbuf);
 	TWhen (get_current_line_type lexbuf) }
@@ -516,7 +550,7 @@ rule token = parse
 		     then TMPtVirg (* works better with tokens_all *)
 		     else TPtVirg (get_current_line_type lexbuf) }
 
-  
+
   | '*'            { pass_zero();
 		     if !current_line_started
 		     then
@@ -525,78 +559,85 @@ rule token = parse
 		       (patch_or_match MATCH;
 			add_current_line_type D.MINUS; token lexbuf) }
   | '/'            { start_line true;
-		     TDmOp (Ast.Div,get_current_line_type lexbuf) } 
+		     TDmOp (Ast.Div,get_current_line_type lexbuf) }
   | '%'            { start_line true;
-		     TDmOp (Ast.Mod,get_current_line_type lexbuf) } 
-  | '~'            { start_line true;  TTilde (get_current_line_type lexbuf) } 
-  
-  | "++"           { start_line true;  TInc (get_current_line_type lexbuf) }
+		     TDmOp (Ast.Mod,get_current_line_type lexbuf) }
+  | '~'            { start_line true;  TTilde (get_current_line_type lexbuf) }
+
+  | "++"           { pass_zero();
+ 		     if !current_line_started
+ 		     then
+ 		       (start_line true; TInc (get_current_line_type lexbuf))
+ 		     else (patch_or_match PATCH;
+ 			   add_current_line_type D.PLUSPLUS; token lexbuf) }
   | "--"           { start_line true;  TDec (get_current_line_type lexbuf) }
-  
-  | "="            { start_line true; TEq (get_current_line_type lexbuf) } 
-  
+
+  | "="            { start_line true; TEq (get_current_line_type lexbuf) }
+
   | "-="           { start_line true; mkassign Ast.Minus lexbuf }
   | "+="           { start_line true; mkassign Ast.Plus lexbuf }
-  
+
   | "*="           { start_line true; mkassign Ast.Mul lexbuf }
   | "/="           { start_line true; mkassign Ast.Div lexbuf }
   | "%="           { start_line true; mkassign Ast.Mod lexbuf }
-  
+
   | "&="           { start_line true; mkassign Ast.And lexbuf }
   | "|="           { start_line true; mkassign Ast.Or lexbuf }
   | "^="           { start_line true; mkassign Ast.Xor lexbuf }
-  
+
   | "<<="          { start_line true; mkassign Ast.DecLeft lexbuf }
   | ">>="          { start_line true; mkassign Ast.DecRight lexbuf }
 
   | ":"            { start_line true; TDotDot (get_current_line_type lexbuf) }
-  
-  | "=="           { start_line true; TEqEq   (get_current_line_type lexbuf) }
-  | "!="           { start_line true; TNotEq  (get_current_line_type lexbuf) } 
+
+  | "=="           { start_line true; TEqEq    (get_current_line_type lexbuf) }
+  | "!="           { start_line true; TNotEq   (get_current_line_type lexbuf) }
   | ">="           { start_line true;
 		     TLogOp(Ast.SupEq,get_current_line_type lexbuf) }
   | "<="           { start_line true;
 		     TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
   | "<"            { start_line true;
-		     TLogOp(Ast.Inf,get_current_line_type lexbuf) } 
+		     TLogOp(Ast.Inf,get_current_line_type lexbuf) }
   | ">"            { start_line true;
 		     TLogOp(Ast.Sup,get_current_line_type lexbuf) }
-  
-  | "&&"           { start_line true; TAndLog (get_current_line_type lexbuf) } 
+
+  | "&&"           { start_line true; TAndLog (get_current_line_type lexbuf) }
   | "||"           { start_line true; TOrLog  (get_current_line_type lexbuf) }
-  
+
   | ">>"           { start_line true;
 		     TShOp(Ast.DecRight,get_current_line_type lexbuf) }
   | "<<"           { start_line true;
 		     TShOp(Ast.DecLeft,get_current_line_type lexbuf) }
-  
+
   | "&"            { start_line true; TAnd    (get_current_line_type lexbuf) }
   | "^"            { start_line true; TXor(get_current_line_type lexbuf) }
 
-  | ( ("#" [' ' '\t']*  "define" [' ' '\t']+))
-    ( (letter (letter |digit)*) as ident) 
+  | "##"            { start_line true; TCppConcatOp }
+  | (( ("#" [' ' '\t']*  "define" [' ' '\t']+)) as def)
+    ( (letter (letter |digit)*) as ident)
       { start_line true;
 	let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
 	  get_current_line_type lexbuf in
-	let off = String.length "#define " in
+	let off = String.length def in
 	(* -1 in the code below because the ident is not at the line start *)
 	TDefine
 	  (lt,
 	   check_var ident
-	     (arity,line,lline,offset+off,(-1),[],[],Ast0.NoMetaPos)) }
-  | ( ("#" [' ' '\t']*  "define" [' ' '\t']+))
-    ( (letter (letter | digit)*) as ident) 
+	     (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) }
+  | (( ("#" [' ' '\t']*  "define" [' ' '\t']+)) as def)
+    ( (letter (letter | digit)*) as ident)
     '('
       { start_line true;
 	let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
 	  get_current_line_type lexbuf in
-	let off = String.length "#define " in
+	let off = String.length def in
 	TDefineParam
         (lt,
 	 check_var ident
 	   (* why pos here but not above? *)
-	   (arity,line,lline,offset+off,(-1),strbef,straft,pos),
-	 offset + off + (String.length ident)) }
+	   (arity,line,lline,offset+off,col+off,strbef,straft,pos),
+	 offset + off + (String.length ident),
+	 col + off + (String.length ident)) }
   | "#" [' ' '\t']* "include" [' ' '\t']* '"' [^ '"']+ '"'
       { TIncludeL
 	  (let str = tok lexbuf in
@@ -619,7 +660,13 @@ rule token = parse
   | "#" [' ' '\t']* "endif" [^'\n']*
   | "#" [' ' '\t']* "error" [^'\n']*
       { start_line true; check_plus_linetype (tok lexbuf);
-	TPragma (tok lexbuf) }
+	TPragma (tok lexbuf, get_current_line_type lexbuf) }
+  | "/*"
+      { start_line true; check_plus_linetype (tok lexbuf);
+	(* second argument to TPragma is not quite right, because
+	   it represents only the first token of the comemnt, but that
+	   should be good enough *)
+	TPragma ("/*"^(comment lexbuf), get_current_line_type lexbuf) }
   | "---" [^'\n']*
       { (if !current_line_started
       then lexerr "--- must be at the beginning of the line" "");
@@ -638,7 +685,7 @@ rule token = parse
 	   (get_current_line_type lexbuf))) }
 
   | letter (letter | digit)*
-      { start_line true; id_tokens lexbuf } 
+      { start_line true; id_tokens lexbuf }
 
   | "'" { start_line true;
 	  TChar(char lexbuf,get_current_line_type lexbuf) }
@@ -646,9 +693,9 @@ rule token = parse
 	  TString(string lexbuf,(get_current_line_type lexbuf)) }
   | (real as x)    { start_line true;
 		     TFloat(x,(get_current_line_type lexbuf)) }
-  | ((( decimal | hexa | octal) 
-      ( ['u' 'U'] 
-      | ['l' 'L']  
+  | ((( decimal | hexa | octal)
+      ( ['u' 'U']
+      | ['l' 'L']
       | (['l' 'L'] ['u' 'U'])
       | (['u' 'U'] ['l' 'L'])
       | (['u' 'U'] ['l' 'L'] ['l' 'L'])
@@ -677,7 +724,7 @@ and char = parse
 	    | _ -> lexerr "unrecognised symbol: " (tok lexbuf)
 	    );
           x
-	} 
+	}
   | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
 
 and string  = parse
@@ -685,16 +732,35 @@ and string  = parse
   | (_ as x)                   { Common.string_of_char x ^ string lexbuf }
   | ("\\" (oct | oct oct | oct oct oct)) as x { x ^ string lexbuf }
   | ("\\x" (hex | hex hex)) as x              { x ^ string lexbuf }
-  | ("\\" (_ as v)) as x  
-       { 
+  | ("\\" (_ as v)) as x
+       {
          (match v with
-         | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> () | 'r' -> () 
-	 | 'f' -> () | 'a' -> ()
-	 | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
-         | 'e' -> ()
-         | '\n' -> () 
-         | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
+	    | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> () | 'r' -> ()
+	    | 'f' -> () | 'a' -> ()
+	    | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
+	    | 'e' -> ()
+	    | '\n' -> ()
+	    | '(' -> () | '|' -> () | ')' -> ()
+	    | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
 	 );
           x ^ string lexbuf
        }
   | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
+
+and comment = parse
+  | "*/"     { start_line true; tok lexbuf }
+  | ['\n' '\r' '\011' '\012']
+      { reset_line lexbuf; let s = tok lexbuf in s ^ comment lexbuf }
+  | "+" { pass_zero();
+	  if !current_line_started
+	  then (start_line true; let s = tok lexbuf in s^(comment lexbuf))
+	  else comment lexbuf }
+  (* noteopti: *)
+  | [^ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf }
+  | [ '*']   { start_line true; let s = tok lexbuf in s ^ comment lexbuf }
+  | _
+      { start_line true; let s = tok lexbuf in
+        Common.pr2 ("LEXER: unrecognised symbol in comment:"^s);
+        s ^ comment lexbuf
+      }
+