X-Git-Url: http://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/34e491640531bd81a0e2238fd599e1aafe53613e..9f8e26f459677a621822918b7539ae94214621ac:/parsing_cocci/lexer_cocci.mll diff --git a/parsing_cocci/lexer_cocci.mll b/parsing_cocci/lexer_cocci.mll index c066d1d..6abc9b1 100644 --- a/parsing_cocci/lexer_cocci.mll +++ b/parsing_cocci/lexer_cocci.mll @@ -1,23 +1,23 @@ (* -* Copyright 2005-2008, Ecole des Mines de Nantes, University of Copenhagen -* Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller -* This file is part of Coccinelle. -* -* Coccinelle is free software: you can redistribute it and/or modify -* it under the terms of the GNU General Public License as published by -* the Free Software Foundation, according to version 2 of the License. -* -* Coccinelle is distributed in the hope that it will be useful, -* but WITHOUT ANY WARRANTY; without even the implied warranty of -* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -* GNU General Public License for more details. -* -* You should have received a copy of the GNU General Public License -* along with Coccinelle. If not, see . -* -* The authors reserve the right to distribute this or future versions of -* Coccinelle under other licenses. -*) + * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen + * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix + * This file is part of Coccinelle. + * + * Coccinelle is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, according to version 2 of the License. + * + * Coccinelle is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with Coccinelle. If not, see . + * + * The authors reserve the right to distribute this or future versions of + * Coccinelle under other licenses. + *) { @@ -48,8 +48,8 @@ let get_current_line_type lexbuf = let lex_start = Lexing.lexeme_start lexbuf in let preceeding_spaces = if !line_start < 0 then 0 else lex_start - !line_start in - line_start := -1; - prev_plus := (c = D.PLUS); + (*line_start := -1;*) + prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS); (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos) let current_line_started = ref false let col_zero = ref true @@ -87,6 +87,8 @@ let add_current_line_type x = current_line_type := (D.OPTMINUS,ln,lln) | (D.PLUS,(D.CONTEXT,ln,lln)) -> current_line_type := (D.PLUS,ln,lln) + | (D.PLUSPLUS,(D.CONTEXT,ln,lln)) -> + current_line_type := (D.PLUSPLUS,ln,lln) | (D.UNIQUE,(D.CONTEXT,ln,lln)) -> current_line_type := (D.UNIQUE,ln,lln) | (D.OPT,(D.CONTEXT,ln,lln)) -> @@ -95,7 +97,7 @@ let add_current_line_type x = let check_minus_context_linetype s = match !current_line_type with - (D.PLUS,_,_) -> lexerr "invalid in a + context: " s + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> lexerr "invalid in a + context: " s | _ -> () let check_context_linetype s = @@ -105,17 +107,18 @@ let check_context_linetype s = let check_plus_linetype s = match !current_line_type with - (D.PLUS,_,_) -> () + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> () | _ -> lexerr "invalid in a non + context: " s let check_arity_context_linetype s = match !current_line_type with - (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.UNIQUE,_,_) | (D.OPT,_,_) -> () + (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.PLUSPLUS,_,_) + | (D.UNIQUE,_,_) | (D.OPT,_,_) -> () | _ -> lexerr "invalid in a nonempty context: " s let process_include start finish str = (match !current_line_type with - (D.PLUS,_,_) -> + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> (try let _ = Str.search_forward (Str.regexp "\\.\\.\\.") str start in lexerr "... not allowed in + include" "" @@ -130,15 +133,20 @@ let pm = ref UNKNOWN let patch_or_match = function PATCH -> - (match !pm with - MATCH -> lexerr "- or + not allowed in the first column for a match" "" - | PATCH -> () - | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH) + if not !D.ignore_patch_or_match + then + (match !pm with + MATCH -> + lexerr "- or + not allowed in the first column for a match" "" + | PATCH -> () + | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH) | MATCH -> - (match !pm with - PATCH -> lexerr "* not allowed in the first column for a patch" "" - | MATCH -> () - | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH) + if not !D.ignore_patch_or_match + then + (match !pm with + PATCH -> lexerr "* not allowed in the first column for a patch" "" + | MATCH -> () + | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH) | _ -> failwith "unexpected argument" (* ---------------------------------------------------------------------- *) @@ -168,7 +176,7 @@ let check_var s linetype = (try (Hashtbl.find type_names s) linetype with Not_found -> (try (Hashtbl.find declarer_names s) linetype - with Not_found -> + with Not_found -> (try (Hashtbl.find iterator_names s) linetype with Not_found -> TIdent (s,linetype)))) in if !Data.in_meta or !Data.in_rule_name @@ -179,7 +187,7 @@ let id_tokens lexbuf = let s = tok lexbuf in let linetype = get_current_line_type lexbuf in let in_rule_name = !Data.in_rule_name in - let in_meta = !Data.in_meta in + let in_meta = !Data.in_meta && not !Data.saw_struct in let in_iso = !Data.in_iso in let in_prolog = !Data.in_prolog in match s with @@ -187,8 +195,14 @@ let id_tokens lexbuf = | "type" when in_meta -> check_arity_context_linetype s; TType | "parameter" when in_meta -> check_arity_context_linetype s; TParameter | "constant" when in_meta -> check_arity_context_linetype s; TConstant + | "generated" when in_rule_name && not (!Flag.make_hrule = None) -> + check_arity_context_linetype s; TGenerated | "expression" when in_meta || in_rule_name -> check_arity_context_linetype s; TExpression + | "initialiser" when in_meta || in_rule_name -> + check_arity_context_linetype s; TInitialiser + | "initializer" when in_meta || in_rule_name -> + check_arity_context_linetype s; TInitialiser | "idexpression" when in_meta -> check_arity_context_linetype s; TIdExpression | "statement" when in_meta -> check_arity_context_linetype s; TStatement @@ -210,15 +224,19 @@ let id_tokens lexbuf = | "words" when in_meta -> check_context_linetype s; TWords | "using" when in_rule_name || in_prolog -> check_context_linetype s; TUsing + | "virtual" when in_prolog -> check_context_linetype s; TVirtual | "disable" when in_rule_name -> check_context_linetype s; TDisable | "extends" when in_rule_name -> check_context_linetype s; TExtends | "depends" when in_rule_name -> check_context_linetype s; TDepends | "on" when in_rule_name -> check_context_linetype s; TOn | "ever" when in_rule_name -> check_context_linetype s; TEver | "never" when in_rule_name -> check_context_linetype s; TNever + (* exists and forall for when are reparsed in parse_cocci.ml *) | "exists" when in_rule_name -> check_context_linetype s; TExists | "forall" when in_rule_name -> check_context_linetype s; TForall - | "reverse" when in_rule_name -> check_context_linetype s; TReverse + | "script" when in_rule_name -> check_context_linetype s; TScript + | "initialize" when in_rule_name -> check_context_linetype s; TInitialize + | "finalize" when in_rule_name -> check_context_linetype s; TFinalize | "char" -> Tchar linetype | "short" -> Tshort linetype @@ -227,11 +245,14 @@ let id_tokens lexbuf = | "float" -> Tfloat linetype | "long" -> Tlong linetype | "void" -> Tvoid linetype - | "struct" -> Tstruct linetype - | "union" -> Tunion linetype + (* in_meta is only for the first keyword; drop it now to allow any type + name *) + | "struct" -> Data.saw_struct := true; Tstruct linetype + | "union" -> Data.saw_struct := true; Tunion linetype + | "enum" -> Data.saw_struct := true; Tenum linetype | "unsigned" -> Tunsigned linetype | "signed" -> Tsigned linetype - + | "auto" -> Tauto linetype | "register" -> Tregister linetype | "extern" -> Textern linetype @@ -281,21 +302,32 @@ let init _ = Data.in_rule_name := false; Data.in_meta := false; Data.in_prolog := false; + Data.saw_struct := false; Data.inheritable_positions := []; Hashtbl.clear all_metavariables; Hashtbl.clear Data.all_metadecls; Hashtbl.clear metavariables; Hashtbl.clear type_names; Hashtbl.clear rule_names; + Hashtbl.clear iterator_names; + Hashtbl.clear declarer_names; let get_name (_,x) = x in Data.add_id_meta := (fun name constraints pure -> let fn clt = TMetaId(name,constraints,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_fresh_id_meta := + (fun name -> + let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_type_meta := (fun name pure -> let fn clt = TMetaType(name,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_init_meta := + (fun name pure -> + let fn clt = TMetaInit(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_param_meta := (function name -> function pure -> let fn clt = TMetaParam(name,pure,clt) in @@ -404,7 +436,7 @@ let hex = ['0'-'9' 'a'-'f' 'A'-'F'] let decimal = ('0' | (['1'-'9'] dec*)) let octal = ['0'] oct+ -let hexa = ("0x" |"0X") hex+ +let hexa = ("0x" |"0X") hex+ let pent = dec+ let pfract = dec+ @@ -425,6 +457,8 @@ rule token = parse then (start_line true; TArob) else (check_minus_context_linetype "@"; TPArob) } + | "~=" { start_line true; TTildeEq (get_current_line_type lexbuf) } + | "!~=" { start_line true; TTildeExclEq (get_current_line_type lexbuf) } | "WHEN" | "when" { start_line true; check_minus_context_linetype (tok lexbuf); TWhen (get_current_line_type lexbuf) } @@ -516,7 +550,7 @@ rule token = parse then TMPtVirg (* works better with tokens_all *) else TPtVirg (get_current_line_type lexbuf) } - + | '*' { pass_zero(); if !current_line_started then @@ -525,78 +559,85 @@ rule token = parse (patch_or_match MATCH; add_current_line_type D.MINUS; token lexbuf) } | '/' { start_line true; - TDmOp (Ast.Div,get_current_line_type lexbuf) } + TDmOp (Ast.Div,get_current_line_type lexbuf) } | '%' { start_line true; - TDmOp (Ast.Mod,get_current_line_type lexbuf) } - | '~' { start_line true; TTilde (get_current_line_type lexbuf) } - - | "++" { start_line true; TInc (get_current_line_type lexbuf) } + TDmOp (Ast.Mod,get_current_line_type lexbuf) } + | '~' { start_line true; TTilde (get_current_line_type lexbuf) } + + | "++" { pass_zero(); + if !current_line_started + then + (start_line true; TInc (get_current_line_type lexbuf)) + else (patch_or_match PATCH; + add_current_line_type D.PLUSPLUS; token lexbuf) } | "--" { start_line true; TDec (get_current_line_type lexbuf) } - - | "=" { start_line true; TEq (get_current_line_type lexbuf) } - + + | "=" { start_line true; TEq (get_current_line_type lexbuf) } + | "-=" { start_line true; mkassign Ast.Minus lexbuf } | "+=" { start_line true; mkassign Ast.Plus lexbuf } - + | "*=" { start_line true; mkassign Ast.Mul lexbuf } | "/=" { start_line true; mkassign Ast.Div lexbuf } | "%=" { start_line true; mkassign Ast.Mod lexbuf } - + | "&=" { start_line true; mkassign Ast.And lexbuf } | "|=" { start_line true; mkassign Ast.Or lexbuf } | "^=" { start_line true; mkassign Ast.Xor lexbuf } - + | "<<=" { start_line true; mkassign Ast.DecLeft lexbuf } | ">>=" { start_line true; mkassign Ast.DecRight lexbuf } | ":" { start_line true; TDotDot (get_current_line_type lexbuf) } - - | "==" { start_line true; TEqEq (get_current_line_type lexbuf) } - | "!=" { start_line true; TNotEq (get_current_line_type lexbuf) } + + | "==" { start_line true; TEqEq (get_current_line_type lexbuf) } + | "!=" { start_line true; TNotEq (get_current_line_type lexbuf) } | ">=" { start_line true; TLogOp(Ast.SupEq,get_current_line_type lexbuf) } | "<=" { start_line true; TLogOp(Ast.InfEq,get_current_line_type lexbuf) } | "<" { start_line true; - TLogOp(Ast.Inf,get_current_line_type lexbuf) } + TLogOp(Ast.Inf,get_current_line_type lexbuf) } | ">" { start_line true; TLogOp(Ast.Sup,get_current_line_type lexbuf) } - - | "&&" { start_line true; TAndLog (get_current_line_type lexbuf) } + + | "&&" { start_line true; TAndLog (get_current_line_type lexbuf) } | "||" { start_line true; TOrLog (get_current_line_type lexbuf) } - + | ">>" { start_line true; TShOp(Ast.DecRight,get_current_line_type lexbuf) } | "<<" { start_line true; TShOp(Ast.DecLeft,get_current_line_type lexbuf) } - + | "&" { start_line true; TAnd (get_current_line_type lexbuf) } | "^" { start_line true; TXor(get_current_line_type lexbuf) } - | ( ("#" [' ' '\t']* "define" [' ' '\t']+)) - ( (letter (letter |digit)*) as ident) + | "##" { start_line true; TCppConcatOp } + | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) + ( (letter (letter |digit)*) as ident) { start_line true; let (arity,line,lline,offset,col,strbef,straft,pos) as lt = get_current_line_type lexbuf in - let off = String.length "#define " in + let off = String.length def in (* -1 in the code below because the ident is not at the line start *) TDefine (lt, check_var ident - (arity,line,lline,offset+off,(-1),[],[],Ast0.NoMetaPos)) } - | ( ("#" [' ' '\t']* "define" [' ' '\t']+)) - ( (letter (letter | digit)*) as ident) + (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) } + | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) + ( (letter (letter | digit)*) as ident) '(' { start_line true; let (arity,line,lline,offset,col,strbef,straft,pos) as lt = get_current_line_type lexbuf in - let off = String.length "#define " in + let off = String.length def in TDefineParam (lt, check_var ident (* why pos here but not above? *) - (arity,line,lline,offset+off,(-1),strbef,straft,pos), - offset + off + (String.length ident)) } + (arity,line,lline,offset+off,col+off,strbef,straft,pos), + offset + off + (String.length ident), + col + off + (String.length ident)) } | "#" [' ' '\t']* "include" [' ' '\t']* '"' [^ '"']+ '"' { TIncludeL (let str = tok lexbuf in @@ -619,7 +660,13 @@ rule token = parse | "#" [' ' '\t']* "endif" [^'\n']* | "#" [' ' '\t']* "error" [^'\n']* { start_line true; check_plus_linetype (tok lexbuf); - TPragma (tok lexbuf) } + TPragma (tok lexbuf, get_current_line_type lexbuf) } + | "/*" + { start_line true; check_plus_linetype (tok lexbuf); + (* second argument to TPragma is not quite right, because + it represents only the first token of the comemnt, but that + should be good enough *) + TPragma ("/*"^(comment lexbuf), get_current_line_type lexbuf) } | "---" [^'\n']* { (if !current_line_started then lexerr "--- must be at the beginning of the line" ""); @@ -638,7 +685,7 @@ rule token = parse (get_current_line_type lexbuf))) } | letter (letter | digit)* - { start_line true; id_tokens lexbuf } + { start_line true; id_tokens lexbuf } | "'" { start_line true; TChar(char lexbuf,get_current_line_type lexbuf) } @@ -646,9 +693,9 @@ rule token = parse TString(string lexbuf,(get_current_line_type lexbuf)) } | (real as x) { start_line true; TFloat(x,(get_current_line_type lexbuf)) } - | ((( decimal | hexa | octal) - ( ['u' 'U'] - | ['l' 'L'] + | ((( decimal | hexa | octal) + ( ['u' 'U'] + | ['l' 'L'] | (['l' 'L'] ['u' 'U']) | (['u' 'U'] ['l' 'L']) | (['u' 'U'] ['l' 'L'] ['l' 'L']) @@ -677,7 +724,7 @@ and char = parse | _ -> lexerr "unrecognised symbol: " (tok lexbuf) ); x - } + } | _ { lexerr "unrecognised symbol: " (tok lexbuf) } and string = parse @@ -685,16 +732,35 @@ and string = parse | (_ as x) { Common.string_of_char x ^ string lexbuf } | ("\\" (oct | oct oct | oct oct oct)) as x { x ^ string lexbuf } | ("\\x" (hex | hex hex)) as x { x ^ string lexbuf } - | ("\\" (_ as v)) as x - { + | ("\\" (_ as v)) as x + { (match v with - | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () - | 'f' -> () | 'a' -> () - | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () - | 'e' -> () - | '\n' -> () - | _ -> lexerr "unrecognised symbol:" (tok lexbuf) + | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () + | 'f' -> () | 'a' -> () + | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () + | 'e' -> () + | '\n' -> () + | '(' -> () | '|' -> () | ')' -> () + | _ -> lexerr "unrecognised symbol:" (tok lexbuf) ); x ^ string lexbuf } | _ { lexerr "unrecognised symbol: " (tok lexbuf) } + +and comment = parse + | "*/" { start_line true; tok lexbuf } + | ['\n' '\r' '\011' '\012'] + { reset_line lexbuf; let s = tok lexbuf in s ^ comment lexbuf } + | "+" { pass_zero(); + if !current_line_started + then (start_line true; let s = tok lexbuf in s^(comment lexbuf)) + else comment lexbuf } + (* noteopti: *) + | [^ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf } + | [ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf } + | _ + { start_line true; let s = tok lexbuf in + Common.pr2 ("LEXER: unrecognised symbol in comment:"^s); + s ^ comment lexbuf + } +