X-Git-Url: http://git.hcoop.net/bpt/coccinelle.git/blobdiff_plain/1eddfd5052863e93b723b26a1d1266471882f234..f3c4ece655af618c34b793f1b3286baee09ccbf7:/parsing_cocci/lexer_cocci.mll diff --git a/parsing_cocci/lexer_cocci.mll b/parsing_cocci/lexer_cocci.mll index b76ec4b..ae77d6e 100644 --- a/parsing_cocci/lexer_cocci.mll +++ b/parsing_cocci/lexer_cocci.mll @@ -52,7 +52,7 @@ let get_current_line_type lexbuf = if !line_start < 0 then 0 else lex_start - !line_start in (*line_start := -1;*) prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS); - (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos) + (c,l,ll,lex_start,preceeding_spaces,[],[],[]) let current_line_started = ref false let col_zero = ref true @@ -197,7 +197,8 @@ let id_tokens lexbuf = let in_iso = !Data.in_iso in let in_prolog = !Data.in_prolog in match s with - "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier + "metavariable" when in_meta -> check_arity_context_linetype s; TMetavariable + | "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier | "type" when in_meta -> check_arity_context_linetype s; TType | "parameter" when in_meta -> check_arity_context_linetype s; TParameter | "constant" when in_meta -> check_arity_context_linetype s; TConstant @@ -331,9 +332,13 @@ let init _ = Hashtbl.clear iterator_names; Hashtbl.clear declarer_names; let get_name (_,x) = x in + Data.add_meta_meta := + (fun name pure -> + let fn clt = TMeta(name,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_id_meta := (fun name constraints pure -> - let fn clt = TMetaId(name,constraints,pure,clt) in + let fn clt = TMetaId(name,constraints,Ast.NoVal,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_virt_id_meta_found := (fun name vl -> @@ -341,11 +346,11 @@ let init _ = Hashtbl.replace metavariables name fn); Data.add_virt_id_meta_not_found := (fun name pure -> - let fn clt = TMetaId(name,Ast.IdNoConstraint,pure,clt) in + let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast.NoVal,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_fresh_id_meta := - (fun name -> - let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in + (fun name seed -> + let fn clt = TMetaId(name,Ast.IdNoConstraint,seed,Ast0.Impure,clt) in Hashtbl.replace metavariables (get_name name) fn); Data.add_type_meta := (fun name pure -> @@ -355,6 +360,10 @@ let init _ = (fun name pure -> let fn clt = TMetaInit(name,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_initlist_meta := + (function name -> function lenname -> function pure -> + let fn clt = TMetaInitList(name,lenname,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_param_meta := (function name -> function pure -> let fn clt = TMetaParam(name,pure,clt) in @@ -395,6 +404,10 @@ let init _ = (function name -> function pure -> let fn clt = TMetaField(name,pure,clt) in Hashtbl.replace metavariables (get_name name) fn); + Data.add_field_list_meta := + (function name -> function lenname -> function pure -> + let fn clt = TMetaFieldList(name,lenname,pure,clt) in + Hashtbl.replace metavariables (get_name name) fn); Data.add_stm_meta := (function name -> function pure -> let fn clt = TMetaStm(name,pure,clt) in @@ -505,20 +518,32 @@ rule token = parse | [' ' '\t' ]+ { start_line false; token lexbuf } - | "//" [^ '\n']* { + | [' ' '\t' ]* (("//" [^ '\n']*) as after) { match !current_line_type with (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> - TPragma (Ast.Indent (tok lexbuf), get_current_line_type lexbuf) + let str = + if !current_line_started + then (tok lexbuf) + else after in + start_line true; + TPragma (Ast.Indent str, get_current_line_type lexbuf) | _ -> start_line false; token lexbuf } + | "__attribute__" [' ' '\t']* "((" _* "))" + { match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + start_line true; + TPragma (Ast.Space (tok lexbuf), get_current_line_type lexbuf) + | _ -> failwith "attributes only allowedin + code" } + | "@@" { start_line true; TArobArob } | "@" { pass_zero(); if !Data.in_rule_name or not !current_line_started then (start_line true; TArob) else (check_minus_context_linetype "@"; TPArob) } - | "~=" { start_line true; TTildeEq (get_current_line_type lexbuf) } - | "!~=" { start_line true; TTildeExclEq (get_current_line_type lexbuf) } + | "=~" { start_line true; TTildeEq (get_current_line_type lexbuf) } + | "!=~" { start_line true; TTildeExclEq (get_current_line_type lexbuf) } | "WHEN" | "when" { start_line true; check_minus_context_linetype (tok lexbuf); TWhen (get_current_line_type lexbuf) } @@ -675,6 +700,17 @@ rule token = parse | "^" { start_line true; TXor(get_current_line_type lexbuf) } | "##" { start_line true; TCppConcatOp } + | (( ("#" [' ' '\t']* "undef" [' ' '\t']+)) as def) + ( (letter (letter |digit)*) as ident) + { start_line true; + let (arity,line,lline,offset,col,strbef,straft,pos) as lt = + get_current_line_type lexbuf in + let off = String.length def in + (* -1 in the code below because the ident is not at the line start *) + TUndef + (lt, + check_var ident + (arity,line,lline,offset+off,col+off,[],[],[])) } | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) ( (letter (letter |digit)*) as ident) { start_line true; @@ -685,7 +721,7 @@ rule token = parse TDefine (lt, check_var ident - (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) } + (arity,line,lline,offset+off,col+off,[],[],[])) } | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def) ( (letter (letter | digit)*) as ident) '(' @@ -724,12 +760,16 @@ rule token = parse { start_line true; check_plus_linetype (tok lexbuf); TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) } | "/*" - { start_line true; check_plus_linetype (tok lexbuf); + { + match !current_line_type with + (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> + start_line true; (* second argument to TPragma is not quite right, because it represents only the first token of the comment, but that should be good enough *) - TPragma (Ast.Indent("/*"^(comment lexbuf)), - get_current_line_type lexbuf) } + TPragma (Ast.Indent("/*"^(comment check_comment lexbuf)), + get_current_line_type lexbuf) + | _ -> let _ = comment (fun _ -> ()) lexbuf in token lexbuf } | "---" [^'\n']* { (if !current_line_started then lexerr "--- must be at the beginning of the line" ""); @@ -775,20 +815,55 @@ rule token = parse and char = parse - | (_ as x) "'" { String.make 1 x } - | (("\\" (oct | oct oct | oct oct oct)) as x "'") { x } - | (("\\x" (hex | hex hex)) as x "'") { x } - | (("\\" (_ as v)) as x "'") - { (match v with - | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () - | 'r' -> () | 'f' -> () | 'a' -> () - | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () - | 'e' -> () - | _ -> lexerr "unrecognised symbol: " (tok lexbuf) - ); - x + | (_ as x) { String.make 1 x ^ restchars lexbuf } + (* todo?: as for octal, do exception beyond radix exception ? *) + | (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf } + (* this rule must be after the one with octal, lex try first longest + * and when \7 we want an octal, not an exn. + *) + | (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf } + | (("\\" (_ as v)) as x ) + { + (match v with (* Machine specific ? *) + | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () + | 'f' -> () | 'a' -> () + | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () + | 'e' -> () (* linuxext: ? *) + | _ -> + Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + ); + x ^ restchars lexbuf } - | _ { lexerr "unrecognised symbol: " (tok lexbuf) } + | _ + { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + tok lexbuf ^ restchars lexbuf + } + +and restchars = parse + | "'" { "" } + | (_ as x) { String.make 1 x ^ restchars lexbuf } + (* todo?: as for octal, do exception beyond radix exception ? *) + | (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf } + (* this rule must be after the one with octal, lex try first longest + * and when \7 we want an octal, not an exn. + *) + | (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf } + | (("\\" (_ as v)) as x ) + { + (match v with (* Machine specific ? *) + | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> () + | 'f' -> () | 'a' -> () + | '\\' -> () | '?' -> () | '\'' -> () | '"' -> () + | 'e' -> () (* linuxext: ? *) + | _ -> + Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + ); + x ^ restchars lexbuf + } + | _ + { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf); + tok lexbuf ^ restchars lexbuf + } and string = parse | '"' { "" } @@ -810,27 +885,28 @@ and string = parse } | _ { lexerr "unrecognised symbol: " (tok lexbuf) } -and comment = parse +and comment check_comment = parse | "*/" { let s = tok lexbuf in check_comment s; start_line true; s } | ['\n' '\r' '\011' '\012'] { let s = tok lexbuf in (* even blank line should have a + *) check_comment s; - reset_line lexbuf; s ^ comment lexbuf } + reset_line lexbuf; s ^ comment check_comment lexbuf } | "+" { pass_zero(); if !current_line_started - then (start_line true; let s = tok lexbuf in s^(comment lexbuf)) - else (start_line true; comment lexbuf) } + then (start_line true; + let s = tok lexbuf in s^(comment check_comment lexbuf)) + else (start_line true; comment check_comment lexbuf) } (* noteopti: *) | [^ '*'] { let s = tok lexbuf in - check_comment s; start_line true; s ^ comment lexbuf } + check_comment s; start_line true; s ^ comment check_comment lexbuf } | [ '*'] { let s = tok lexbuf in - check_comment s; start_line true; s ^ comment lexbuf } + check_comment s; start_line true; s ^ comment check_comment lexbuf } | _ { start_line true; let s = tok lexbuf in Common.pr2 ("LEXER: unrecognised symbol in comment:"^s); - s ^ comment lexbuf + s ^ comment check_comment lexbuf }