(*
-* Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen
-* Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller
-* This file is part of Coccinelle.
-*
-* Coccinelle is free software: you can redistribute it and/or modify
-* it under the terms of the GNU General Public License as published by
-* the Free Software Foundation, according to version 2 of the License.
-*
-* Coccinelle is distributed in the hope that it will be useful,
-* but WITHOUT ANY WARRANTY; without even the implied warranty of
-* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-* GNU General Public License for more details.
-*
-* You should have received a copy of the GNU General Public License
-* along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
-*
-* The authors reserve the right to distribute this or future versions of
-* Coccinelle under other licenses.
-*)
+ * Copyright 2012, INRIA
+ * Julia Lawall, Gilles Muller
+ * Copyright 2010-2011, INRIA, University of Copenhagen
+ * Julia Lawall, Rene Rydhof Hansen, Gilles Muller, Nicolas Palix
+ * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen
+ * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
+ * This file is part of Coccinelle.
+ *
+ * Coccinelle is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation, according to version 2 of the License.
+ *
+ * Coccinelle is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
+ *
+ * The authors reserve the right to distribute this or future versions of
+ * Coccinelle under other licenses.
+ *)
{
let lex_start = Lexing.lexeme_start lexbuf in
let preceeding_spaces =
if !line_start < 0 then 0 else lex_start - !line_start in
- line_start := -1;
- prev_plus := (c = D.PLUS);
- (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos)
+ (*line_start := -1;*)
+ prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS);
+ (c,l,ll,lex_start,preceeding_spaces,[],[],[])
let current_line_started = ref false
let col_zero = ref true
+let contextify (c,l,ll,lex_start,preceeding_spaces,bef,aft,pos) =
+ (D.CONTEXT,l,ll,lex_start,preceeding_spaces,bef,aft,pos)
+
let reset_line lexbuf =
line := !line + 1;
current_line_type := (D.CONTEXT,!line,!logical_line);
current_line_type := (D.OPTMINUS,ln,lln)
| (D.PLUS,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.PLUS,ln,lln)
+ | (D.PLUSPLUS,(D.CONTEXT,ln,lln)) ->
+ current_line_type := (D.PLUSPLUS,ln,lln)
| (D.UNIQUE,(D.CONTEXT,ln,lln)) ->
current_line_type := (D.UNIQUE,ln,lln)
| (D.OPT,(D.CONTEXT,ln,lln)) ->
let check_minus_context_linetype s =
match !current_line_type with
- (D.PLUS,_,_) -> lexerr "invalid in a + context: " s
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> lexerr "invalid in a + context: " s
| _ -> ()
let check_context_linetype s =
let check_plus_linetype s =
match !current_line_type with
- (D.PLUS,_,_) -> ()
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> ()
| _ -> lexerr "invalid in a non + context: " s
let check_arity_context_linetype s =
match !current_line_type with
- (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
+ (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.PLUSPLUS,_,_)
+ | (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
| _ -> lexerr "invalid in a nonempty context: " s
+let check_comment s =
+ if not !current_line_started
+ then lexerr "+ expected at the beginning of the line" s
+
let process_include start finish str =
(match !current_line_type with
- (D.PLUS,_,_) ->
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
(try
let _ = Str.search_forward (Str.regexp "\\.\\.\\.") str start in
lexerr "... not allowed in + include" ""
let patch_or_match = function
PATCH ->
- (match !pm with
- MATCH -> lexerr "- or + not allowed in the first column for a match" ""
- | PATCH -> ()
- | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
+ if not !D.ignore_patch_or_match
+ then
+ (match !pm with
+ MATCH ->
+ lexerr "- or + not allowed in the first column for a match" ""
+ | PATCH -> ()
+ | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
| MATCH ->
- (match !pm with
- PATCH -> lexerr "* not allowed in the first column for a patch" ""
- | MATCH -> ()
- | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
+ if not !D.ignore_patch_or_match
+ then
+ (match !pm with
+ PATCH -> lexerr "* not allowed in the first column for a patch" ""
+ | MATCH -> ()
+ | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
| _ -> failwith "unexpected argument"
(* ---------------------------------------------------------------------- *)
let iterator_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
+let symbol_names = (Hashtbl.create(15) : (string, D.clt -> token) Hashtbl.t)
+
let rule_names = (Hashtbl.create(100) : (string, unit) Hashtbl.t)
let check_var s linetype =
(try (Hashtbl.find declarer_names s) linetype
with Not_found ->
(try (Hashtbl.find iterator_names s) linetype
- with Not_found -> TIdent (s,linetype)))) in
+ with Not_found ->
+ (try (Hashtbl.find symbol_names s) linetype
+ with Not_found ->
+ TIdent (s,linetype))))) in
if !Data.in_meta or !Data.in_rule_name
then (try Hashtbl.find rule_names s; TRuleName s with Not_found -> fail())
else fail()
let s = tok lexbuf in
let linetype = get_current_line_type lexbuf in
let in_rule_name = !Data.in_rule_name in
- let in_meta = !Data.in_meta in
+ let in_meta = !Data.in_meta && not !Data.saw_struct in
let in_iso = !Data.in_iso in
let in_prolog = !Data.in_prolog in
match s with
- "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier
+ "metavariable" when in_meta -> check_arity_context_linetype s; TMetavariable
+ | "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier
| "type" when in_meta -> check_arity_context_linetype s; TType
| "parameter" when in_meta -> check_arity_context_linetype s; TParameter
| "constant" when in_meta -> check_arity_context_linetype s; TConstant
check_arity_context_linetype s; TGenerated
| "expression" when in_meta || in_rule_name ->
check_arity_context_linetype s; TExpression
+ | "declaration" when in_meta || in_rule_name ->
+ check_arity_context_linetype s; TDeclaration
+ | "field" when in_meta || in_rule_name ->
+ check_arity_context_linetype s; TField
| "initialiser" when in_meta || in_rule_name ->
check_arity_context_linetype s; TInitialiser
| "initializer" when in_meta || in_rule_name ->
check_arity_context_linetype s; TContext
| "error" when in_meta -> check_arity_context_linetype s; TError
| "words" when in_meta -> check_context_linetype s; TWords
+ | "symbol" when in_meta -> check_arity_context_linetype s; TSymbol
| "using" when in_rule_name || in_prolog -> check_context_linetype s; TUsing
+ | "virtual" when in_prolog or in_rule_name or in_meta ->
+ (* don't want to allow virtual as a rule name *)
+ check_context_linetype s; TVirtual
| "disable" when in_rule_name -> check_context_linetype s; TDisable
| "extends" when in_rule_name -> check_context_linetype s; TExtends
| "depends" when in_rule_name -> check_context_linetype s; TDepends
| "on" when in_rule_name -> check_context_linetype s; TOn
| "ever" when in_rule_name -> check_context_linetype s; TEver
| "never" when in_rule_name -> check_context_linetype s; TNever
+ (* exists and forall for when are reparsed in parse_cocci.ml *)
| "exists" when in_rule_name -> check_context_linetype s; TExists
| "forall" when in_rule_name -> check_context_linetype s; TForall
- | "reverse" when in_rule_name -> check_context_linetype s; TReverse
- | "script" when in_rule_name -> check_context_linetype s; TScript
+ | "script" when in_rule_name -> check_context_linetype s; TScript
+ | "initialize" when in_rule_name -> check_context_linetype s; TInitialize
+ | "finalize" when in_rule_name -> check_context_linetype s; TFinalize
| "char" -> Tchar linetype
| "short" -> Tshort linetype
| "float" -> Tfloat linetype
| "long" -> Tlong linetype
| "void" -> Tvoid linetype
- | "struct" -> Tstruct linetype
- | "union" -> Tunion linetype
- | "enum" -> Tenum linetype
+ | "size_t" -> Tsize_t linetype
+ | "ssize_t" -> Tssize_t linetype
+ | "ptrdiff_t" -> Tptrdiff_t linetype
+ (* in_meta is only for the first keyword; drop it now to allow any type
+ name *)
+ | "struct" -> Data.saw_struct := true; Tstruct linetype
+ | "union" -> Data.saw_struct := true; Tunion linetype
+ | "enum" -> Data.saw_struct := true; Tenum linetype
| "unsigned" -> Tunsigned linetype
| "signed" -> Tsigned linetype
| "sizeof" -> TSizeof linetype
- | "Expression" -> TIsoExpression
- | "ArgExpression" -> TIsoArgExpression
- | "TestExpression" -> TIsoTestExpression
- | "Statement" -> TIsoStatement
- | "Declaration" -> TIsoDeclaration
- | "Type" -> TIsoType
- | "TopLevel" -> TIsoTopLevel
+ | "Expression" when !Data.in_iso -> TIsoExpression
+ | "ArgExpression" when !Data.in_iso -> TIsoArgExpression
+ | "TestExpression" when !Data.in_iso -> TIsoTestExpression
+ | "ToTestExpression" when !Data.in_iso -> TIsoToTestExpression
+ | "Statement" when !Data.in_iso -> TIsoStatement
+ | "Declaration" when !Data.in_iso -> TIsoDeclaration
+ | "Type" when !Data.in_iso -> TIsoType
+ | "TopLevel" when !Data.in_iso -> TIsoTopLevel
+
+ | "_" when !Data.in_meta -> TUnderscore
| s -> check_var s linetype
prev_plus := false;
line_start := 0;
current_line_started := false;
+ current_line_type := (D.CONTEXT,0,0);
col_zero := true;
pm := UNKNOWN;
Data.in_rule_name := false;
Data.in_meta := false;
Data.in_prolog := false;
+ Data.saw_struct := false;
Data.inheritable_positions := [];
Hashtbl.clear all_metavariables;
Hashtbl.clear Data.all_metadecls;
Hashtbl.clear metavariables;
Hashtbl.clear type_names;
Hashtbl.clear rule_names;
+ Hashtbl.clear iterator_names;
+ Hashtbl.clear declarer_names;
+ Hashtbl.clear symbol_names;
let get_name (_,x) = x in
+ Data.add_meta_meta :=
+ (fun name pure ->
+ let fn clt = TMeta(name,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
Data.add_id_meta :=
(fun name constraints pure ->
- let fn clt = TMetaId(name,constraints,pure,clt) in
+ let fn clt = TMetaId(name,constraints,Ast.NoVal,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
+ Data.add_virt_id_meta_found :=
+ (fun name vl ->
+ let fn clt = TIdent(vl,clt) in
+ Hashtbl.replace metavariables name fn);
+ Data.add_virt_id_meta_not_found :=
+ (fun name pure ->
+ let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast.NoVal,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
+ Data.add_fresh_id_meta :=
+ (fun name seed ->
+ let fn clt = TMetaId(name,Ast.IdNoConstraint,seed,Ast0.Impure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
Data.add_type_meta :=
(fun name pure ->
(fun name pure ->
let fn clt = TMetaInit(name,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
+ Data.add_initlist_meta :=
+ (function name -> function lenname -> function pure ->
+ let fn clt = TMetaInitList(name,lenname,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
Data.add_param_meta :=
(function name -> function pure ->
let fn clt = TMetaParam(name,pure,clt) in
(function name -> function lenname -> function pure ->
let fn clt = TMetaExpList(name,lenname,pure,clt) in
Hashtbl.replace metavariables (get_name name) fn);
+ Data.add_decl_meta :=
+ (function name -> function pure ->
+ let fn clt = TMetaDecl(name,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
+ Data.add_field_meta :=
+ (function name -> function pure ->
+ let fn clt = TMetaField(name,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
+ Data.add_field_list_meta :=
+ (function name -> function lenname -> function pure ->
+ let fn clt = TMetaFieldList(name,lenname,pure,clt) in
+ Hashtbl.replace metavariables (get_name name) fn);
Data.add_stm_meta :=
(function name -> function pure ->
let fn clt = TMetaStm(name,pure,clt) in
(function name ->
let fn clt = TIteratorId(name,clt) in
Hashtbl.replace iterator_names name fn);
+ Data.add_symbol_meta :=
+ (function name ->
+ let fn clt = TSymId (name,clt) in
+ Hashtbl.replace symbol_names name fn);
Data.init_rule := (function _ -> Hashtbl.clear metavariables);
Data.install_bindings :=
(function parent ->
List.iter (function (name,fn) -> Hashtbl.add metavariables name fn)
(Hashtbl.find all_metavariables parent))
+(* the following is needed to properly tokenize include files. Because an
+include file is included after seeing a @, so current_line_started is true.
+Current_line_started is not important for parsing the name of a rule, so we
+don't have to reset this value to true after parsing an included file. *)
+let include_init _ =
+ current_line_started := false
+
let drop_spaces s =
let len = String.length s in
let rec loop n =
rule token = parse
- | [' ' '\t' ]+ { start_line false; token lexbuf }
- | ['\n' '\r' '\011' '\012'] { reset_line lexbuf; token lexbuf }
+ | [' ' '\t']* ['\n' '\r' '\011' '\012']
+ { let cls = !current_line_started in
+
+ if not cls
+ then
+ begin
+ match !current_line_type with
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+ let info = get_current_line_type lexbuf in
+ reset_line lexbuf;
+ TPragma (Ast.Noindent "", info)
+ | _ -> reset_line lexbuf; token lexbuf
+ end
+ else (reset_line lexbuf; token lexbuf) }
+
+ | [' ' '\t' ]+ { start_line false; token lexbuf }
+
+ | [' ' '\t' ]* (("//" [^ '\n']*) as after) {
+ match !current_line_type with
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+ let str =
+ if !current_line_started
+ then (tok lexbuf)
+ else after in
+ start_line true;
+ TPragma (Ast.Indent str, get_current_line_type lexbuf)
+ | _ -> start_line false; token lexbuf }
- | "//" [^ '\n']* { start_line false; token lexbuf }
+ | "__attribute__" [' ' '\t']* "((" _* "))"
+ { match !current_line_type with
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+ start_line true;
+ TPragma (Ast.Space (tok lexbuf), get_current_line_type lexbuf)
+ | _ -> failwith "attributes only allowedin + code" }
| "@@" { start_line true; TArobArob }
| "@" { pass_zero();
if !Data.in_rule_name or not !current_line_started
then (start_line true; TArob)
- else (check_minus_context_linetype "@"; TPArob) }
+ else (check_minus_context_linetype "@";
+ TPArob (get_current_line_type lexbuf)) }
+ | "=~" { start_line true; TTildeEq (get_current_line_type lexbuf) }
+ | "!~" { start_line true; TTildeExclEq (get_current_line_type lexbuf) }
| "WHEN" | "when"
{ start_line true; check_minus_context_linetype (tok lexbuf);
TWhen (get_current_line_type lexbuf) }
TOEllipsis (get_current_line_type lexbuf) }
| "...>" { start_line true; check_context_linetype (tok lexbuf);
TCEllipsis (get_current_line_type lexbuf) }
- | "<+..." { start_line true; check_context_linetype (tok lexbuf);
+ | "<+..." { start_line true; check_minus_context_linetype (tok lexbuf);
TPOEllipsis (get_current_line_type lexbuf) }
- | "...+>" { start_line true; check_context_linetype (tok lexbuf);
+ | "...+>" { start_line true; check_minus_context_linetype (tok lexbuf);
TPCEllipsis (get_current_line_type lexbuf) }
(*
| "<ooo" { start_line true; check_context_linetype (tok lexbuf);
else if !Data.in_meta
then TBang0
else (add_current_line_type D.UNIQUE; token lexbuf) }
- | "(" { if not !col_zero
+ | "(" { if !Data.in_meta or not !col_zero
then (start_line true; TOPar (get_current_line_type lexbuf))
else
(start_line true; check_context_linetype (tok lexbuf);
TOPar0 (get_current_line_type lexbuf))}
- | "\\(" { start_line true; TOPar0 (get_current_line_type lexbuf) }
+ | "\\(" { start_line true;
+ TOPar0 (contextify(get_current_line_type lexbuf)) }
| "|" { if not (!col_zero)
then (start_line true; TOr(get_current_line_type lexbuf))
else (start_line true;
check_context_linetype (tok lexbuf);
TMid0 (get_current_line_type lexbuf))}
- | "\\|" { start_line true; TMid0 (get_current_line_type lexbuf) }
+ | "\\|" { start_line true;
+ TMid0 (contextify(get_current_line_type lexbuf)) }
| ")" { if not !col_zero
then (start_line true; TCPar (get_current_line_type lexbuf))
else
(start_line true; check_context_linetype (tok lexbuf);
TCPar0 (get_current_line_type lexbuf))}
- | "\\)" { start_line true; TCPar0 (get_current_line_type lexbuf) }
+ | "\\)" { start_line true;
+ TCPar0 (contextify(get_current_line_type lexbuf)) }
| '[' { start_line true; TOCro (get_current_line_type lexbuf) }
| ']' { start_line true; TCCro (get_current_line_type lexbuf) }
TDmOp (Ast.Mod,get_current_line_type lexbuf) }
| '~' { start_line true; TTilde (get_current_line_type lexbuf) }
- | "++" { start_line true; TInc (get_current_line_type lexbuf) }
+ | "++" { pass_zero();
+ if !current_line_started
+ then
+ (start_line true; TInc (get_current_line_type lexbuf))
+ else (patch_or_match PATCH;
+ add_current_line_type D.PLUSPLUS; token lexbuf) }
| "--" { start_line true; TDec (get_current_line_type lexbuf) }
| "=" { start_line true; TEq (get_current_line_type lexbuf) }
| ":" { start_line true; TDotDot (get_current_line_type lexbuf) }
- | "==" { start_line true; TEqEq (get_current_line_type lexbuf) }
- | "!=" { start_line true; TNotEq (get_current_line_type lexbuf) }
+ | "==" { start_line true; TEqEq (get_current_line_type lexbuf) }
+ | "!=" { start_line true; TNotEq (get_current_line_type lexbuf) }
| ">=" { start_line true;
TLogOp(Ast.SupEq,get_current_line_type lexbuf) }
| "<=" { start_line true;
- TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
+ if !Data.in_meta
+ then TSub(get_current_line_type lexbuf)
+ else TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
| "<" { start_line true;
TLogOp(Ast.Inf,get_current_line_type lexbuf) }
| ">" { start_line true;
| "||" { start_line true; TOrLog (get_current_line_type lexbuf) }
| ">>" { start_line true;
- TShOp(Ast.DecRight,get_current_line_type lexbuf) }
+ TShROp(Ast.DecRight,get_current_line_type lexbuf) }
| "<<" { start_line true;
- TShOp(Ast.DecLeft,get_current_line_type lexbuf) }
+ TShLOp(Ast.DecLeft,get_current_line_type lexbuf) }
| "&" { start_line true; TAnd (get_current_line_type lexbuf) }
| "^" { start_line true; TXor(get_current_line_type lexbuf) }
- | ( ("#" [' ' '\t']* "define" [' ' '\t']+))
+ | "##" { start_line true; TCppConcatOp }
+ | (( ("#" [' ' '\t']* "undef" [' ' '\t']+)) as def)
( (letter (letter |digit)*) as ident)
{ start_line true;
let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
get_current_line_type lexbuf in
- let off = String.length "#define " in
+ let off = String.length def in
+ (* -1 in the code below because the ident is not at the line start *)
+ TUndef
+ (lt,
+ check_var ident
+ (arity,line,lline,offset+off,col+off,[],[],[])) }
+ | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def)
+ ( (letter (letter |digit)*) as ident)
+ { start_line true;
+ let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
+ get_current_line_type lexbuf in
+ let off = String.length def in
(* -1 in the code below because the ident is not at the line start *)
TDefine
(lt,
check_var ident
- (arity,line,lline,offset+off,(-1),[],[],Ast0.NoMetaPos)) }
- | ( ("#" [' ' '\t']* "define" [' ' '\t']+))
+ (arity,line,lline,offset+off,col+off,[],[],[])) }
+ | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def)
( (letter (letter | digit)*) as ident)
'('
{ start_line true;
let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
get_current_line_type lexbuf in
- let off = String.length "#define " in
+ let off = String.length def in
TDefineParam
(lt,
check_var ident
(* why pos here but not above? *)
- (arity,line,lline,offset+off,(-1),strbef,straft,pos),
- offset + off + (String.length ident)) }
+ (arity,line,lline,offset+off,col+off,strbef,straft,pos),
+ offset + off + (String.length ident),
+ col + off + (String.length ident)) }
| "#" [' ' '\t']* "include" [' ' '\t']* '"' [^ '"']+ '"'
{ TIncludeL
(let str = tok lexbuf in
| "#" [' ' '\t']* "elif" [^'\n']*
| "#" [' ' '\t']* "endif" [^'\n']*
| "#" [' ' '\t']* "error" [^'\n']*
+ | "#" [' ' '\t']* "pragma" [^'\n']*
+ | "#" [' ' '\t']* "line" [^'\n']*
{ start_line true; check_plus_linetype (tok lexbuf);
- TPragma (tok lexbuf, get_current_line_type lexbuf) }
+ TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) }
| "/*"
- { start_line true; check_plus_linetype (tok lexbuf);
+ {
+ match !current_line_type with
+ (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+ start_line true;
(* second argument to TPragma is not quite right, because
- it represents only the first token of the comemnt, but that
+ it represents only the first token of the comment, but that
should be good enough *)
- TPragma ("/*"^(comment lexbuf), get_current_line_type lexbuf) }
+ TPragma (Ast.Indent("/*"^(comment check_comment lexbuf)),
+ get_current_line_type lexbuf)
+ | _ -> let _ = comment (fun _ -> ()) lexbuf in token lexbuf }
| "---" [^'\n']*
{ (if !current_line_started
then lexerr "--- must be at the beginning of the line" "");
and char = parse
- | (_ as x) "'" { String.make 1 x }
- | (("\\" (oct | oct oct | oct oct oct)) as x "'") { x }
- | (("\\x" (hex | hex hex)) as x "'") { x }
- | (("\\" (_ as v)) as x "'")
- { (match v with
- | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> ()
- | 'r' -> () | 'f' -> () | 'a' -> ()
- | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
- | 'e' -> ()
- | _ -> lexerr "unrecognised symbol: " (tok lexbuf)
- );
- x
+ | (_ as x) { String.make 1 x ^ restchars lexbuf }
+ (* todo?: as for octal, do exception beyond radix exception ? *)
+ | (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf }
+ (* this rule must be after the one with octal, lex try first longest
+ * and when \7 we want an octal, not an exn.
+ *)
+ | (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf }
+ | (("\\" (_ as v)) as x )
+ {
+ (match v with (* Machine specific ? *)
+ | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
+ | 'f' -> () | 'a' -> ()
+ | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
+ | 'e' -> () (* linuxext: ? *)
+ | _ ->
+ Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+ );
+ x ^ restchars lexbuf
}
- | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
+ | _
+ { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+ tok lexbuf ^ restchars lexbuf
+ }
+
+and restchars = parse
+ | "'" { "" }
+ | (_ as x) { String.make 1 x ^ restchars lexbuf }
+ (* todo?: as for octal, do exception beyond radix exception ? *)
+ | (("\\" (oct | oct oct | oct oct oct)) as x ) { x ^ restchars lexbuf }
+ (* this rule must be after the one with octal, lex try first longest
+ * and when \7 we want an octal, not an exn.
+ *)
+ | (("\\x" ((hex | hex hex))) as x ) { x ^ restchars lexbuf }
+ | (("\\" (_ as v)) as x )
+ {
+ (match v with (* Machine specific ? *)
+ | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
+ | 'f' -> () | 'a' -> ()
+ | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
+ | 'e' -> () (* linuxext: ? *)
+ | _ ->
+ Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+ );
+ x ^ restchars lexbuf
+ }
+ | _
+ { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+ tok lexbuf ^ restchars lexbuf
+ }
and string = parse
| '"' { "" }
| ("\\" (_ as v)) as x
{
(match v with
- | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
- | 'f' -> () | 'a' -> ()
- | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
- | 'e' -> ()
- | '\n' -> ()
- | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
+ | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
+ | 'f' -> () | 'a' -> ()
+ | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
+ | 'e' -> ()
+ | '\n' -> ()
+ | '(' -> () | '|' -> () | ')' -> ()
+ | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
);
x ^ string lexbuf
}
| _ { lexerr "unrecognised symbol: " (tok lexbuf) }
-and comment = parse
- | "*/" { start_line true; tok lexbuf }
+and comment check_comment = parse
+ | "*/" { let s = tok lexbuf in check_comment s; start_line true; s }
| ['\n' '\r' '\011' '\012']
- { reset_line lexbuf; let s = tok lexbuf in s ^ comment lexbuf }
+ { let s = tok lexbuf in
+ (* even blank line should have a + *)
+ check_comment s;
+ reset_line lexbuf; s ^ comment check_comment lexbuf }
| "+" { pass_zero();
if !current_line_started
- then (start_line true; let s = tok lexbuf in s^(comment lexbuf))
- else comment lexbuf }
+ then (start_line true;
+ let s = tok lexbuf in s^(comment check_comment lexbuf))
+ else (start_line true; comment check_comment lexbuf) }
(* noteopti: *)
- | [^ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf }
- | [ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf }
- | _
+ | [^ '*']
+ { let s = tok lexbuf in
+ check_comment s; start_line true; s ^ comment check_comment lexbuf }
+ | [ '*']
+ { let s = tok lexbuf in
+ check_comment s; start_line true; s ^ comment check_comment lexbuf }
+ | _
{ start_line true; let s = tok lexbuf in
Common.pr2 ("LEXER: unrecognised symbol in comment:"^s);
- s ^ comment lexbuf
+ s ^ comment check_comment lexbuf
}