X-Git-Url: https://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/9f8e26f459677a621822918b7539ae94214621ac..8babbc8f18fe06ec20e19630a1ec09e759c380e1:/parsing_cocci/lexer_cocci.mll diff --git a/parsing_cocci/lexer_cocci.mll b/parsing_cocci/lexer_cocci.mll index 6abc9b1..46fdde8 100644 --- a/parsing_cocci/lexer_cocci.mll +++ b/parsing_cocci/lexer_cocci.mll @@ -1,4 +1,6 @@ (* + * Copyright 2010, INRIA, University of Copenhagen + * Julia Lawall, Rene Rydhof Hansen, Gilles Muller, Nicolas Palix * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix * This file is part of Coccinelle. @@ -50,7 +52,7 @@ let get_current_line_type lexbuf = if !line_start < 0 then 0 else lex_start - !line_start in (*line_start := -1;*) prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS); - (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos) + (c,l,ll,lex_start,preceeding_spaces,[],[],[]) let current_line_started = ref false let col_zero = ref true @@ -116,6 +118,10 @@ let check_arity_context_linetype s = | (D.UNIQUE,_,_) | (D.OPT,_,_) -> () | _ -> lexerr "invalid in a nonempty context: " s +let check_comment s = + if not !current_line_started + then lexerr "+ expected at the beginning of the line" s + let process_include start finish str = (match !current_line_type with (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> @@ -191,7 +197,8 @@ let id_tokens lexbuf = let in_iso = !Data.in_iso in let in_prolog = !Data.in_prolog in match s with - "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier + "metavariable" when in_meta -> check_arity_context_linetype s; TMetavariable + | "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier | "type" when in_meta -> check_arity_context_linetype s; TType | "parameter" when in_meta -> check_arity_context_linetype s; TParameter | "constant" when in_meta -> check_arity_context_linetype s; TConstant @@ -199,6 +206,10 @@ let id_tokens lexbuf = check_arity_context_linetype s; TGenerated | "expression" when in_meta || in_rule_name -> check_arity_context_linetype s; TExpression + | "declaration" when in_meta || in_rule_name -> + check_arity_context_linetype s; TDeclaration + | "field" when in_meta || in_rule_name -> + check_arity_context_linetype s; TField | "initialiser" when in_meta || in_rule_name -> check_arity_context_linetype s; TInitialiser | "initializer" when in_meta || in_rule_name -> @@ -224,7 +235,9 @@ let id_tokens lexbuf = | "words" when in_meta -> check_context_linetype s; TWords | "using" when in_rule_name || in_prolog -> check_context_linetype s; TUsing - | "virtual" when in_prolog -> check_context_linetype s; TVirtual + | "virtual" when in_prolog or in_rule_name or in_meta -> + (* don't want to allow virtual as a rule name *) + check_context_linetype s; TVirtual | "disable" when in_rule_name -> check_context_linetype s; TDisable | "extends" when in_rule_name -> check_context_linetype s; TExtends | "depends" when in_rule_name -> check_context_linetype s; TDepends @@ -245,6 +258,9 @@ let id_tokens lexbuf = | "float" -> Tfloat linetype | "long" -> Tlong linetype | "void" -> Tvoid linetype + | "size_t" -> Tsize_t linetype + | "ssize_t" -> Tssize_t linetype + | "ptrdiff_t" -> Tptrdiff_t linetype (* in_meta is only for the first keyword; drop it now to allow any type name *) | "struct" -> Data.saw_struct := true; Tstruct linetype @@ -278,13 +294,16 @@ let id_tokens lexbuf = | "sizeof" -> TSizeof linetype - | "Expression" -> TIsoExpression - | "ArgExpression" -> TIsoArgExpression - | "TestExpression" -> TIsoTestExpression - | "Statement" -> TIsoStatement - | "Declaration" -> TIsoDeclaration - | "Type" -> TIsoType - | "TopLevel" -> TIsoTopLevel + | "Expression" when !Data.in_iso -> TIsoExpression + | "ArgExpression" when !Data.in_iso -> TIsoArgExpression + | "TestExpression" when !Data.in_iso -> TIsoTestExpression + | "ToTestExpression" when !Data.in_iso -> TIsoToTestExpression + | "Statement" when !Data.in_iso -> TIsoStatement + | "Declaration" when !Data.in_iso -> TIsoDeclaration + | "Type" when !Data.in_iso -> TIsoType + | "TopLevel" when !Data.in_iso -> TIsoTopLevel + + | "_" when !Data.in_meta -> TUnderscore | s -> check_var s linetype @@ -297,6 +316,7 @@ let init _ = prev_plus := false; line_start := 0; current_line_started := false; + current_line_type := (D.CONTEXT,0,0); col_zero := true; pm := UNKNOWN; Data.in_rule_name := false; @@ -312,13 +332,25 @@ let init _ = Hashtbl.clear iterator_names; Hashtbl.clear declarer_names; let get_name (_,x) = x in + Data.add_meta_meta := + (fun name pure -> + let fn clt = TMeta(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_id_meta := (fun name constraints pure -> - let fn clt = TMetaId(name,constraints,pure,clt) in + let fn clt = TMetaId(name,constraints,Ast.NoVal,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); + Data.add_virt_id_meta_found := + (fun name vl -> + let fn clt = TIdent(vl,clt) in + Hashtbl.replace metavariables name fn); + Data.add_virt_id_meta_not_found := + (fun name pure -> + let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast.NoVal,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_fresh_id_meta := - (fun name -> - let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in + (fun name seed -> + let fn clt = TMetaId(name,Ast.IdNoConstraint,seed,Ast0.Impure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_type_meta := (fun name pure -> @@ -328,6 +360,10 @@ let init _ = (fun name pure -> let fn clt = TMetaInit(name,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_initlist_meta := + (function name -> function lenname -> function pure -> + let fn clt = TMetaInitList(name,lenname,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_param_meta := (function name -> function pure -> let fn clt = TMetaParam(name,pure,clt) in @@ -360,6 +396,18 @@ let init _ = (function name -> function lenname -> function pure -> let fn clt = TMetaExpList(name,lenname,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_decl_meta := + (function name -> function pure -> + let fn clt = TMetaDecl(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); + Data.add_field_meta := + (function name -> function pure -> + let fn clt = TMetaField(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); + Data.add_field_list_meta := + (function name -> function lenname -> function pure -> + let fn clt = TMetaFieldList(name,lenname,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_stm_meta := (function name -> function pure -> let fn clt = TMetaStm(name,pure,clt) in @@ -411,6 +459,13 @@ let init _ = List.iter (function (name,fn) -> Hashtbl.add metavariables name fn) (Hashtbl.find all_metavariables parent)) +(* the following is needed to properly tokenize include files. Because an +include file is included after seeing a @, so current_line_started is true. +Current_line_started is not important for parsing the name of a rule, so we +don't have to reset this value to true after parsing an included file. *) +let include_init _ = + current_line_started := false + let drop_spaces s = let len = String.length s in let rec loop n = @@ -446,10 +501,40 @@ let real = pent exp | ((pent? '.' pfract | pent '.' pfract? ) exp?) rule token = parse - | [' ' '\t' ]+ { start_line false; token lexbuf } - | ['\n' '\r' '\011' '\012'] { reset_line lexbuf; token lexbuf } + | [' ' '\t']* ['\n' '\r' '\011' '\012'] + { let cls = !current_line_started in + + if not cls + then + begin + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + let info = get_current_line_type lexbuf in + reset_line lexbuf; + TPragma (Ast.Noindent "", info) + | _ -> reset_line lexbuf; token lexbuf + end + else (reset_line lexbuf; token lexbuf) } + + | [' ' '\t' ]+ { start_line false; token lexbuf } + + | [' ' '\t' ]* (("//" [^ '\n']*) as after) { + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + let str = + if !current_line_started + then (tok lexbuf) + else after in + start_line true; + TPragma (Ast.Indent str, get_current_line_type lexbuf) + | _ -> start_line false; token lexbuf } - | "//" [^ '\n']* { start_line false; token lexbuf } + | "__attribute__" [' ' '\t']* "((" _* "))" + { match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + start_line true; + TPragma (Ast.Space (tok lexbuf), get_current_line_type lexbuf) + | _ -> failwith "attributes only allowedin + code" } | "@@" { start_line true; TArobArob } | "@" { pass_zero(); @@ -479,9 +564,9 @@ rule token = parse TOEllipsis (get_current_line_type lexbuf) } | "...>" { start_line true; check_context_linetype (tok lexbuf); TCEllipsis (get_current_line_type lexbuf) } - | "<+..." { start_line true; check_context_linetype (tok lexbuf); + | "<+..." { start_line true; check_minus_context_linetype (tok lexbuf); TPOEllipsis (get_current_line_type lexbuf) } - | "...+>" { start_line true; check_context_linetype (tok lexbuf); + | "...+>" { start_line true; check_minus_context_linetype (tok lexbuf); TPCEllipsis (get_current_line_type lexbuf) } (* | "=" { start_line true; TLogOp(Ast.SupEq,get_current_line_type lexbuf) } | "<=" { start_line true; - TLogOp(Ast.InfEq,get_current_line_type lexbuf) } + if !Data.in_meta + then TSub(get_current_line_type lexbuf) + else TLogOp(Ast.InfEq,get_current_line_type lexbuf) } | "<" { start_line true; TLogOp(Ast.Inf,get_current_line_type lexbuf) } | ">" { start_line true; @@ -605,14 +692,25 @@ rule token = parse | "||" { start_line true; TOrLog (get_current_line_type lexbuf) } | ">>" { start_line true; - TShOp(Ast.DecRight,get_current_line_type lexbuf) } + TShROp(Ast.DecRight,get_current_line_type lexbuf) } | "<<" { start_line true; - TShOp(Ast.DecLeft,get_current_line_type lexbuf) } + TShLOp(Ast.DecLeft,get_current_line_type lexbuf) } | "&" { start_line true; TAnd (get_current_line_type lexbuf) } | "^" { start_line true; TXor(get_current_line_type lexbuf) } | "##" { start_line true; TCppConcatOp } + | (( ("#" [' ' '\t']* "undef" [' ' '\t']+)) as def) + ( (letter (letter |digit)*) as ident) + { start_line true; + let (arity,line,lline,offset,col,strbef,straft,pos) as lt = + get_current_line_type lexbuf in + let off = String.length def in + (* -1 in the code below because the ident is not at the line start *) + TUndef + (lt, + check_var ident + (arity,line,lline,offset+off,col+off,[],[],[])) } | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) ( (letter (letter |digit)*) as ident) { start_line true; @@ -623,7 +721,7 @@ rule token = parse TDefine (lt, check_var ident - (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) } + (arity,line,lline,offset+off,col+off,[],[],[])) } | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) ( (letter (letter | digit)*) as ident) '(' @@ -660,13 +758,18 @@ rule token = parse | "#" [' ' '\t']* "endif" [^'\n']* | "#" [' ' '\t']* "error" [^'\n']* { start_line true; check_plus_linetype (tok lexbuf); - TPragma (tok lexbuf, get_current_line_type lexbuf) } + TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) } | "/*" - { start_line true; check_plus_linetype (tok lexbuf); + { + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + start_line true; (* second argument to TPragma is not quite right, because - it represents only the first token of the comemnt, but that + it represents only the first token of the comment, but that should be good enough *) - TPragma ("/*"^(comment lexbuf), get_current_line_type lexbuf) } + TPragma (Ast.Indent("/*"^(comment check_comment lexbuf)), + get_current_line_type lexbuf) + | _ -> let _ = comment (fun _ -> ()) lexbuf in token lexbuf } | "---" [^'\n']* { (if !current_line_started then lexerr "--- must be at the beginning of the line" ""); @@ -747,20 +850,28 @@ and string = parse } | _ { lexerr "unrecognised symbol: " (tok lexbuf) } -and comment = parse - | "*/" { start_line true; tok lexbuf } +and comment check_comment = parse + | "*/" { let s = tok lexbuf in check_comment s; start_line true; s } | ['\n' '\r' '\011' '\012'] - { reset_line lexbuf; let s = tok lexbuf in s ^ comment lexbuf } + { let s = tok lexbuf in + (* even blank line should have a + *) + check_comment s; + reset_line lexbuf; s ^ comment check_comment lexbuf } | "+" { pass_zero(); if !current_line_started - then (start_line true; let s = tok lexbuf in s^(comment lexbuf)) - else comment lexbuf } + then (start_line true; + let s = tok lexbuf in s^(comment check_comment lexbuf)) + else (start_line true; comment check_comment lexbuf) } (* noteopti: *) - | [^ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf } - | [ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf } + | [^ '*'] + { let s = tok lexbuf in + check_comment s; start_line true; s ^ comment check_comment lexbuf } + | [ '*'] + { let s = tok lexbuf in + check_comment s; start_line true; s ^ comment check_comment lexbuf } | _ { start_line true; let s = tok lexbuf in Common.pr2 ("LEXER: unrecognised symbol in comment:"^s); - s ^ comment lexbuf + s ^ comment check_comment lexbuf }