X-Git-Url: https://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/951c78018cc91c58699aef95c0ccc20f34065162..17ba07880e1838028b4516ba7a2db2147b3aa1c9:/parsing_cocci/lexer_cocci.mll diff --git a/parsing_cocci/lexer_cocci.mll b/parsing_cocci/lexer_cocci.mll index 14a62d9..d94c7d8 100644 --- a/parsing_cocci/lexer_cocci.mll +++ b/parsing_cocci/lexer_cocci.mll @@ -1,4 +1,8 @@ (* + * Copyright 2012, INRIA + * Julia Lawall, Gilles Muller + * Copyright 2010-2011, INRIA, University of Copenhagen + * Julia Lawall, Rene Rydhof Hansen, Gilles Muller, Nicolas Palix * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix * This file is part of Coccinelle. @@ -50,10 +54,13 @@ let get_current_line_type lexbuf = if !line_start < 0 then 0 else lex_start - !line_start in (*line_start := -1;*) prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS); - (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos) + (c,l,ll,lex_start,preceeding_spaces,[],[],[]) let current_line_started = ref false let col_zero = ref true +let contextify (c,l,ll,lex_start,preceeding_spaces,bef,aft,pos) = + (D.CONTEXT,l,ll,lex_start,preceeding_spaces,bef,aft,pos) + let reset_line lexbuf = line := !line + 1; current_line_type := (D.CONTEXT,!line,!logical_line); @@ -116,6 +123,10 @@ let check_arity_context_linetype s = | (D.UNIQUE,_,_) | (D.OPT,_,_) -> () | _ -> lexerr "invalid in a nonempty context: " s +let check_comment s = + if not !current_line_started + then lexerr "+ expected at the beginning of the line" s + let process_include start finish str = (match !current_line_type with (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> @@ -133,15 +144,20 @@ let pm = ref UNKNOWN let patch_or_match = function PATCH -> - (match !pm with - MATCH -> lexerr "- or + not allowed in the first column for a match" "" - | PATCH -> () - | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH) + if not !D.ignore_patch_or_match + then + (match !pm with + MATCH -> + lexerr "- or + not allowed in the first column for a match" "" + | PATCH -> () + | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH) | MATCH -> - (match !pm with - PATCH -> lexerr "* not allowed in the first column for a patch" "" - | MATCH -> () - | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH) + if not !D.ignore_patch_or_match + then + (match !pm with + PATCH -> lexerr "* not allowed in the first column for a patch" "" + | MATCH -> () + | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH) | _ -> failwith "unexpected argument" (* ---------------------------------------------------------------------- *) @@ -158,6 +174,8 @@ let declarer_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t) let iterator_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t) +let symbol_names = (Hashtbl.create(15) : (string, D.clt -> token) Hashtbl.t) + let rule_names = (Hashtbl.create(100) : (string, unit) Hashtbl.t) let check_var s linetype = @@ -173,7 +191,10 @@ let check_var s linetype = (try (Hashtbl.find declarer_names s) linetype with Not_found -> (try (Hashtbl.find iterator_names s) linetype - with Not_found -> TIdent (s,linetype)))) in + with Not_found -> + (try (Hashtbl.find symbol_names s) linetype + with Not_found -> + TIdent (s,linetype))))) in if !Data.in_meta or !Data.in_rule_name then (try Hashtbl.find rule_names s; TRuleName s with Not_found -> fail()) else fail() @@ -186,7 +207,8 @@ let id_tokens lexbuf = let in_iso = !Data.in_iso in let in_prolog = !Data.in_prolog in match s with - "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier + "metavariable" when in_meta -> check_arity_context_linetype s; TMetavariable + | "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier | "type" when in_meta -> check_arity_context_linetype s; TType | "parameter" when in_meta -> check_arity_context_linetype s; TParameter | "constant" when in_meta -> check_arity_context_linetype s; TConstant @@ -194,6 +216,10 @@ let id_tokens lexbuf = check_arity_context_linetype s; TGenerated | "expression" when in_meta || in_rule_name -> check_arity_context_linetype s; TExpression + | "declaration" when in_meta || in_rule_name -> + check_arity_context_linetype s; TDeclaration + | "field" when in_meta || in_rule_name -> + check_arity_context_linetype s; TField | "initialiser" when in_meta || in_rule_name -> check_arity_context_linetype s; TInitialiser | "initializer" when in_meta || in_rule_name -> @@ -217,9 +243,12 @@ let id_tokens lexbuf = check_arity_context_linetype s; TContext | "error" when in_meta -> check_arity_context_linetype s; TError | "words" when in_meta -> check_context_linetype s; TWords + | "symbol" when in_meta -> check_arity_context_linetype s; TSymbol | "using" when in_rule_name || in_prolog -> check_context_linetype s; TUsing - | "virtual" when in_prolog -> check_context_linetype s; TVirtual + | "virtual" when in_prolog or in_rule_name or in_meta -> + (* don't want to allow virtual as a rule name *) + check_context_linetype s; TVirtual | "disable" when in_rule_name -> check_context_linetype s; TDisable | "extends" when in_rule_name -> check_context_linetype s; TExtends | "depends" when in_rule_name -> check_context_linetype s; TDepends @@ -240,6 +269,9 @@ let id_tokens lexbuf = | "float" -> Tfloat linetype | "long" -> Tlong linetype | "void" -> Tvoid linetype + | "size_t" -> Tsize_t linetype + | "ssize_t" -> Tssize_t linetype + | "ptrdiff_t" -> Tptrdiff_t linetype (* in_meta is only for the first keyword; drop it now to allow any type name *) | "struct" -> Data.saw_struct := true; Tstruct linetype @@ -273,13 +305,16 @@ let id_tokens lexbuf = | "sizeof" -> TSizeof linetype - | "Expression" -> TIsoExpression - | "ArgExpression" -> TIsoArgExpression - | "TestExpression" -> TIsoTestExpression - | "Statement" -> TIsoStatement - | "Declaration" -> TIsoDeclaration - | "Type" -> TIsoType - | "TopLevel" -> TIsoTopLevel + | "Expression" when !Data.in_iso -> TIsoExpression + | "ArgExpression" when !Data.in_iso -> TIsoArgExpression + | "TestExpression" when !Data.in_iso -> TIsoTestExpression + | "ToTestExpression" when !Data.in_iso -> TIsoToTestExpression + | "Statement" when !Data.in_iso -> TIsoStatement + | "Declaration" when !Data.in_iso -> TIsoDeclaration + | "Type" when !Data.in_iso -> TIsoType + | "TopLevel" when !Data.in_iso -> TIsoTopLevel + + | "_" when !Data.in_meta -> TUnderscore | s -> check_var s linetype @@ -292,6 +327,7 @@ let init _ = prev_plus := false; line_start := 0; current_line_started := false; + current_line_type := (D.CONTEXT,0,0); col_zero := true; pm := UNKNOWN; Data.in_rule_name := false; @@ -306,14 +342,27 @@ let init _ = Hashtbl.clear rule_names; Hashtbl.clear iterator_names; Hashtbl.clear declarer_names; + Hashtbl.clear symbol_names; let get_name (_,x) = x in + Data.add_meta_meta := + (fun name pure -> + let fn clt = TMeta(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_id_meta := (fun name constraints pure -> - let fn clt = TMetaId(name,constraints,pure,clt) in + let fn clt = TMetaId(name,constraints,Ast.NoVal,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); + Data.add_virt_id_meta_found := + (fun name vl -> + let fn clt = TIdent(vl,clt) in + Hashtbl.replace metavariables name fn); + Data.add_virt_id_meta_not_found := + (fun name pure -> + let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast.NoVal,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_fresh_id_meta := - (fun name -> - let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in + (fun name seed -> + let fn clt = TMetaId(name,Ast.IdNoConstraint,seed,Ast0.Impure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_type_meta := (fun name pure -> @@ -323,6 +372,10 @@ let init _ = (fun name pure -> let fn clt = TMetaInit(name,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_initlist_meta := + (function name -> function lenname -> function pure -> + let fn clt = TMetaInitList(name,lenname,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_param_meta := (function name -> function pure -> let fn clt = TMetaParam(name,pure,clt) in @@ -355,6 +408,18 @@ let init _ = (function name -> function lenname -> function pure -> let fn clt = TMetaExpList(name,lenname,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_decl_meta := + (function name -> function pure -> + let fn clt = TMetaDecl(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); + Data.add_field_meta := + (function name -> function pure -> + let fn clt = TMetaField(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); + Data.add_field_list_meta := + (function name -> function lenname -> function pure -> + let fn clt = TMetaFieldList(name,lenname,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_stm_meta := (function name -> function pure -> let fn clt = TMetaStm(name,pure,clt) in @@ -400,12 +465,23 @@ let init _ = (function name -> let fn clt = TIteratorId(name,clt) in Hashtbl.replace iterator_names name fn); + Data.add_symbol_meta := + (function name -> + let fn clt = TSymId (name,clt) in + Hashtbl.replace symbol_names name fn); Data.init_rule := (function _ -> Hashtbl.clear metavariables); Data.install_bindings := (function parent -> List.iter (function (name,fn) -> Hashtbl.add metavariables name fn) (Hashtbl.find all_metavariables parent)) +(* the following is needed to properly tokenize include files. Because an +include file is included after seeing a @, so current_line_started is true. +Current_line_started is not important for parsing the name of a rule, so we +don't have to reset this value to true after parsing an included file. *) +let include_init _ = + current_line_started := false + let drop_spaces s = let len = String.length s in let rec loop n = @@ -441,19 +517,50 @@ let real = pent exp | ((pent? '.' pfract | pent '.' pfract? ) exp?) rule token = parse - | [' ' '\t' ]+ { start_line false; token lexbuf } - | ['\n' '\r' '\011' '\012'] { reset_line lexbuf; token lexbuf } + | [' ' '\t']* ['\n' '\r' '\011' '\012'] + { let cls = !current_line_started in + + if not cls + then + begin + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + let info = get_current_line_type lexbuf in + reset_line lexbuf; + TPragma (Ast.Noindent "", info) + | _ -> reset_line lexbuf; token lexbuf + end + else (reset_line lexbuf; token lexbuf) } + + | [' ' '\t' ]+ { start_line false; token lexbuf } + + | [' ' '\t' ]* (("//" [^ '\n']*) as after) { + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + let str = + if !current_line_started + then (tok lexbuf) + else after in + start_line true; + TPragma (Ast.Indent str, get_current_line_type lexbuf) + | _ -> start_line false; token lexbuf } - | "//" [^ '\n']* { start_line false; token lexbuf } + | "__attribute__" [' ' '\t']* "((" _* "))" + { match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + start_line true; + TPragma (Ast.Space (tok lexbuf), get_current_line_type lexbuf) + | _ -> failwith "attributes only allowedin + code" } | "@@" { start_line true; TArobArob } | "@" { pass_zero(); if !Data.in_rule_name or not !current_line_started then (start_line true; TArob) - else (check_minus_context_linetype "@"; TPArob) } + else (check_minus_context_linetype "@"; + TPArob (get_current_line_type lexbuf)) } - | "~=" { start_line true; TTildeEq (get_current_line_type lexbuf) } - | "!~=" { start_line true; TTildeExclEq (get_current_line_type lexbuf) } + | "=~" { start_line true; TTildeEq (get_current_line_type lexbuf) } + | "!~" { start_line true; TTildeExclEq (get_current_line_type lexbuf) } | "WHEN" | "when" { start_line true; check_minus_context_linetype (tok lexbuf); TWhen (get_current_line_type lexbuf) } @@ -474,9 +581,9 @@ rule token = parse TOEllipsis (get_current_line_type lexbuf) } | "...>" { start_line true; check_context_linetype (tok lexbuf); TCEllipsis (get_current_line_type lexbuf) } - | "<+..." { start_line true; check_context_linetype (tok lexbuf); + | "<+..." { start_line true; check_minus_context_linetype (tok lexbuf); TPOEllipsis (get_current_line_type lexbuf) } - | "...+>" { start_line true; check_context_linetype (tok lexbuf); + | "...+>" { start_line true; check_minus_context_linetype (tok lexbuf); TPCEllipsis (get_current_line_type lexbuf) } (* | "=" { start_line true; TLogOp(Ast.SupEq,get_current_line_type lexbuf) } | "<=" { start_line true; - TLogOp(Ast.InfEq,get_current_line_type lexbuf) } + if !Data.in_meta + then TSub(get_current_line_type lexbuf) + else TLogOp(Ast.InfEq,get_current_line_type lexbuf) } | "<" { start_line true; TLogOp(Ast.Inf,get_current_line_type lexbuf) } | ">" { start_line true; @@ -600,14 +712,25 @@ rule token = parse | "||" { start_line true; TOrLog (get_current_line_type lexbuf) } | ">>" { start_line true; - TShOp(Ast.DecRight,get_current_line_type lexbuf) } + TShROp(Ast.DecRight,get_current_line_type lexbuf) } | "<<" { start_line true; - TShOp(Ast.DecLeft,get_current_line_type lexbuf) } + TShLOp(Ast.DecLeft,get_current_line_type lexbuf) } | "&" { start_line true; TAnd (get_current_line_type lexbuf) } | "^" { start_line true; TXor(get_current_line_type lexbuf) } | "##" { start_line true; TCppConcatOp } + | (( ("#" [' ' '\t']* "undef" [' ' '\t']+)) as def) + ( (letter (letter |digit)*) as ident) + { start_line true; + let (arity,line,lline,offset,col,strbef,straft,pos) as lt = + get_current_line_type lexbuf in + let off = String.length def in + (* -1 in the code below because the ident is not at the line start *) + TUndef + (lt, + check_var ident + (arity,line,lline,offset+off,col+off,[],[],[])) } | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) ( (letter (letter |digit)*) as ident) { start_line true; @@ -618,7 +741,7 @@ rule token = parse TDefine (lt, check_var ident - (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) } + (arity,line,lline,offset+off,col+off,[],[],[])) } | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) ( (letter (letter | digit)*) as ident) '(' @@ -654,14 +777,21 @@ rule token = parse | "#" [' ' '\t']* "elif" [^'\n']* | "#" [' ' '\t']* "endif" [^'\n']* | "#" [' ' '\t']* "error" [^'\n']* + | "#" [' ' '\t']* "pragma" [^'\n']* + | "#" [' ' '\t']* "line" [^'\n']* { start_line true; check_plus_linetype (tok lexbuf); - TPragma (tok lexbuf, get_current_line_type lexbuf) } + TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) } | "/*" - { start_line true; check_plus_linetype (tok lexbuf); + { + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + start_line true; (* second argument to TPragma is not quite right, because - it represents only the first token of the comemnt, but that + it represents only the first token of the comment, but that should be good enough *) - TPragma ("/*"^(comment lexbuf), get_current_line_type lexbuf) } + TPragma (Ast.Indent("/*"^(comment check_comment lexbuf)), + get_current_line_type lexbuf) + | _ -> let _ = comment (fun _ -> ()) lexbuf in token lexbuf } | "---" [^'\n']* { (if !current_line_started then lexerr "--- must be at the beginning of the line" ""); @@ -707,20 +837,55 @@ rule token = parse and char = parse - | (_ as x) "'" { String.make 1 x } - | (("\\" (oct | oct oct | oct oct oct)) as x "'") { x } - | (("\\x" (hex | hex hex)) as x "'") { x } - | (("\\" (_ as v)) as x "'") - { (match v with - | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () - | 'r' -> () | 'f' -> () | 'a' -> () - | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () - | 'e' -> () - | _ -> lexerr "unrecognised symbol: " (tok lexbuf) - ); - x + | (_ as x) { String.make 1 x ^ restchars lexbuf } + (* todo?: as for octal, do exception beyond radix exception ? *) + | (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf } + (* this rule must be after the one with octal, lex try first longest + * and when \7 we want an octal, not an exn. + *) + | (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf } + | (("\\" (_ as v)) as x ) + { + (match v with (* Machine specific ? *) + | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () + | 'f' -> () | 'a' -> () + | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () + | 'e' -> () (* linuxext: ? *) + | _ -> + Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + ); + x ^ restchars lexbuf } - | _ { lexerr "unrecognised symbol: " (tok lexbuf) } + | _ + { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + tok lexbuf ^ restchars lexbuf + } + +and restchars = parse + | "'" { "" } + | (_ as x) { String.make 1 x ^ restchars lexbuf } + (* todo?: as for octal, do exception beyond radix exception ? *) + | (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf } + (* this rule must be after the one with octal, lex try first longest + * and when \7 we want an octal, not an exn. + *) + | (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf } + | (("\\" (_ as v)) as x ) + { + (match v with (* Machine specific ? *) + | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () + | 'f' -> () | 'a' -> () + | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () + | 'e' -> () (* linuxext: ? *) + | _ -> + Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + ); + x ^ restchars lexbuf + } + | _ + { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + tok lexbuf ^ restchars lexbuf + } and string = parse | '"' { "" } @@ -730,31 +895,40 @@ and string = parse | ("\\" (_ as v)) as x { (match v with - | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () - | 'f' -> () | 'a' -> () - | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () - | 'e' -> () - | '\n' -> () - | _ -> lexerr "unrecognised symbol:" (tok lexbuf) + | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () + | 'f' -> () | 'a' -> () + | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () + | 'e' -> () + | '\n' -> () + | '(' -> () | '|' -> () | ')' -> () + | _ -> lexerr "unrecognised symbol:" (tok lexbuf) ); x ^ string lexbuf } | _ { lexerr "unrecognised symbol: " (tok lexbuf) } -and comment = parse - | "*/" { start_line true; tok lexbuf } +and comment check_comment = parse + | "*/" { let s = tok lexbuf in check_comment s; start_line true; s } | ['\n' '\r' '\011' '\012'] - { reset_line lexbuf; let s = tok lexbuf in s ^ comment lexbuf } + { let s = tok lexbuf in + (* even blank line should have a + *) + check_comment s; + reset_line lexbuf; s ^ comment check_comment lexbuf } | "+" { pass_zero(); if !current_line_started - then (start_line true; let s = tok lexbuf in s^(comment lexbuf)) - else comment lexbuf } + then (start_line true; + let s = tok lexbuf in s^(comment check_comment lexbuf)) + else (start_line true; comment check_comment lexbuf) } (* noteopti: *) - | [^ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf } - | [ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf } + | [^ '*'] + { let s = tok lexbuf in + check_comment s; start_line true; s ^ comment check_comment lexbuf } + | [ '*'] + { let s = tok lexbuf in + check_comment s; start_line true; s ^ comment check_comment lexbuf } | _ { start_line true; let s = tok lexbuf in Common.pr2 ("LEXER: unrecognised symbol in comment:"^s); - s ^ comment lexbuf + s ^ comment check_comment lexbuf }