Coccinelle release-1.0.0-rc11

[bpt/coccinelle.git] / parsing_cocci / lexer_cocci.mll
diff --git a/parsing_cocci/lexer_cocci.mll b/parsing_cocci/lexer_cocci.mll

index 14a62d9..d94c7d8 100644 (file)
--- a/parsing_cocci/lexer_cocci.mll
+++ b/parsing_cocci/lexer_cocci.mll
@@ -1,4 +1,8 @@
  (*
+ * Copyright 2012, INRIA
+ * Julia Lawall, Gilles Muller
+ * Copyright 2010-2011, INRIA, University of Copenhagen
+ * Julia Lawall, Rene Rydhof Hansen, Gilles Muller, Nicolas Palix
   * Copyright 2005-2009, Ecole des Mines de Nantes, University of Copenhagen
   * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
   * This file is part of Coccinelle.
@@ -50,10 +54,13 @@ let get_current_line_type lexbuf =
      if !line_start < 0 then 0 else lex_start - !line_start in
    (*line_start := -1;*)
    prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS);
-  (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos)
+  (c,l,ll,lex_start,preceeding_spaces,[],[],[])
  let current_line_started = ref false
  let col_zero = ref true
  
+let contextify (c,l,ll,lex_start,preceeding_spaces,bef,aft,pos) =
+  (D.CONTEXT,l,ll,lex_start,preceeding_spaces,bef,aft,pos)
+
  let reset_line lexbuf =
    line := !line + 1;
    current_line_type := (D.CONTEXT,!line,!logical_line);
@@ -116,6 +123,10 @@ let check_arity_context_linetype s =
    | (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
    | _ -> lexerr "invalid in a nonempty context: " s
  
+let check_comment s =
+  if not !current_line_started
+  then lexerr "+ expected at the beginning of the line" s
+
  let process_include start finish str =
    (match !current_line_type with
      (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
@@ -133,15 +144,20 @@ let pm = ref UNKNOWN
  
  let patch_or_match = function
      PATCH ->
-      (match !pm with
-       MATCH -> lexerr "- or + not allowed in the first column for a match" ""
-      |        PATCH -> ()
-      |        UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
+      if not !D.ignore_patch_or_match
+      then
+       (match !pm with
+         MATCH ->
+           lexerr "- or + not allowed in the first column for a match" ""
+       | PATCH -> ()
+       | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
    | MATCH ->
-      (match !pm with
-       PATCH -> lexerr "* not allowed in the first column for a patch" ""
-      |        MATCH -> ()
-      |        UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
+      if not !D.ignore_patch_or_match
+      then
+       (match !pm with
+         PATCH -> lexerr "* not allowed in the first column for a patch" ""
+       | MATCH -> ()
+       | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
    | _ -> failwith "unexpected argument"
  
  (* ---------------------------------------------------------------------- *)
@@ -158,6 +174,8 @@ let declarer_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
  
  let iterator_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
  
+let symbol_names = (Hashtbl.create(15) : (string, D.clt -> token) Hashtbl.t)
+
  let rule_names = (Hashtbl.create(100) : (string, unit) Hashtbl.t)
  
  let check_var s linetype =
@@ -173,7 +191,10 @@ let check_var s linetype =
           (try (Hashtbl.find declarer_names s) linetype
           with Not_found ->
             (try (Hashtbl.find iterator_names s) linetype
-           with Not_found -> TIdent (s,linetype)))) in
+           with Not_found ->
+             (try (Hashtbl.find symbol_names s) linetype
+             with Not_found ->
+                TIdent (s,linetype))))) in
    if !Data.in_meta or !Data.in_rule_name
    then (try Hashtbl.find rule_names s; TRuleName s with Not_found -> fail())
    else fail()
@@ -186,7 +207,8 @@ let id_tokens lexbuf =
    let in_iso = !Data.in_iso in
    let in_prolog = !Data.in_prolog in
    match s with
-    "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier
+    "metavariable" when in_meta -> check_arity_context_linetype s; TMetavariable
+  | "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier
    | "type" when in_meta ->       check_arity_context_linetype s; TType
    | "parameter" when in_meta ->  check_arity_context_linetype s; TParameter
    | "constant"  when in_meta ->  check_arity_context_linetype s; TConstant
@@ -194,6 +216,10 @@ let id_tokens lexbuf =
        check_arity_context_linetype s; TGenerated
    | "expression" when in_meta || in_rule_name ->
        check_arity_context_linetype s; TExpression
+  | "declaration" when in_meta || in_rule_name ->
+      check_arity_context_linetype s; TDeclaration
+  | "field" when in_meta || in_rule_name ->
+      check_arity_context_linetype s; TField
    | "initialiser" when in_meta || in_rule_name ->
        check_arity_context_linetype s; TInitialiser
    | "initializer" when in_meta || in_rule_name ->
@@ -217,9 +243,12 @@ let id_tokens lexbuf =
        check_arity_context_linetype s; TContext
    | "error" when in_meta ->      check_arity_context_linetype s; TError
    | "words" when in_meta ->      check_context_linetype s; TWords
+  | "symbol" when in_meta ->     check_arity_context_linetype s; TSymbol
  
    | "using" when in_rule_name || in_prolog ->  check_context_linetype s; TUsing
-  | "virtual" when in_prolog ->  check_context_linetype s; TVirtual
+  | "virtual" when in_prolog or in_rule_name or in_meta ->
+      (* don't want to allow virtual as a rule name *)
+      check_context_linetype s; TVirtual
    | "disable" when in_rule_name ->  check_context_linetype s; TDisable
    | "extends" when in_rule_name -> check_context_linetype s; TExtends
    | "depends" when in_rule_name -> check_context_linetype s; TDepends
@@ -240,6 +269,9 @@ let id_tokens lexbuf =
    | "float" ->      Tfloat    linetype
    | "long" ->       Tlong     linetype
    | "void" ->       Tvoid     linetype
+  | "size_t" ->     Tsize_t   linetype
+  | "ssize_t" ->    Tssize_t  linetype
+  | "ptrdiff_t" ->  Tptrdiff_t linetype
    (* in_meta is only for the first keyword; drop it now to allow any type
       name *)
    | "struct" ->     Data.saw_struct := true; Tstruct   linetype
@@ -273,13 +305,16 @@ let id_tokens lexbuf =
  
    | "sizeof" ->     TSizeof   linetype
  
-  | "Expression"     -> TIsoExpression
-  | "ArgExpression"  -> TIsoArgExpression
-  | "TestExpression" -> TIsoTestExpression
-  | "Statement"      -> TIsoStatement
-  | "Declaration"    -> TIsoDeclaration
-  | "Type"           -> TIsoType
-  | "TopLevel"       -> TIsoTopLevel
+  | "Expression"       when !Data.in_iso -> TIsoExpression
+  | "ArgExpression"    when !Data.in_iso -> TIsoArgExpression
+  | "TestExpression"   when !Data.in_iso -> TIsoTestExpression
+  | "ToTestExpression" when !Data.in_iso -> TIsoToTestExpression
+  | "Statement"        when !Data.in_iso -> TIsoStatement
+  | "Declaration"      when !Data.in_iso -> TIsoDeclaration
+  | "Type"             when !Data.in_iso -> TIsoType
+  | "TopLevel"         when !Data.in_iso -> TIsoTopLevel
+
+  | "_" when !Data.in_meta -> TUnderscore
  
    | s -> check_var s linetype
  
@@ -292,6 +327,7 @@ let init _ =
    prev_plus := false;
    line_start := 0;
    current_line_started := false;
+  current_line_type := (D.CONTEXT,0,0);
    col_zero := true;
    pm := UNKNOWN;
    Data.in_rule_name := false;
@@ -306,14 +342,27 @@ let init _ =
    Hashtbl.clear rule_names;
    Hashtbl.clear iterator_names;
    Hashtbl.clear declarer_names;
+  Hashtbl.clear symbol_names;
    let get_name (_,x) = x in
+  Data.add_meta_meta :=
+    (fun name pure ->
+      let fn clt = TMeta(name,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
    Data.add_id_meta :=
      (fun name constraints pure ->
-      let fn clt = TMetaId(name,constraints,pure,clt) in
+      let fn clt = TMetaId(name,constraints,Ast.NoVal,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_virt_id_meta_found :=
+    (fun name vl ->
+      let fn clt = TIdent(vl,clt) in
+      Hashtbl.replace metavariables name fn);
+  Data.add_virt_id_meta_not_found :=
+    (fun name pure ->
+      let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast.NoVal,pure,clt) in
        Hashtbl.replace metavariables (get_name name) fn);
    Data.add_fresh_id_meta :=
-    (fun name ->
-      let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in
+    (fun name seed ->
+      let fn clt = TMetaId(name,Ast.IdNoConstraint,seed,Ast0.Impure,clt) in
        Hashtbl.replace metavariables (get_name name) fn);
    Data.add_type_meta :=
      (fun name pure ->
@@ -323,6 +372,10 @@ let init _ =
      (fun name pure ->
        let fn clt = TMetaInit(name,pure,clt) in
        Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_initlist_meta :=
+    (function name -> function lenname -> function pure ->
+      let fn clt = TMetaInitList(name,lenname,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
    Data.add_param_meta :=
      (function name -> function pure ->
        let fn clt = TMetaParam(name,pure,clt) in
@@ -355,6 +408,18 @@ let init _ =
      (function name -> function lenname -> function pure ->
        let fn clt = TMetaExpList(name,lenname,pure,clt) in
        Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_decl_meta :=
+    (function name -> function pure ->
+      let fn clt = TMetaDecl(name,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_field_meta :=
+    (function name -> function pure ->
+      let fn clt = TMetaField(name,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
+  Data.add_field_list_meta :=
+    (function name -> function lenname -> function pure ->
+      let fn clt = TMetaFieldList(name,lenname,pure,clt) in
+      Hashtbl.replace metavariables (get_name name) fn);
    Data.add_stm_meta :=
      (function name -> function pure ->
        let fn clt = TMetaStm(name,pure,clt) in
@@ -400,12 +465,23 @@ let init _ =
      (function name ->
        let fn clt = TIteratorId(name,clt) in
        Hashtbl.replace iterator_names name fn);
+  Data.add_symbol_meta :=
+    (function name ->
+      let fn clt = TSymId (name,clt) in
+      Hashtbl.replace symbol_names name fn);
    Data.init_rule := (function _ -> Hashtbl.clear metavariables);
    Data.install_bindings :=
      (function parent ->
        List.iter (function (name,fn) -> Hashtbl.add metavariables name fn)
         (Hashtbl.find all_metavariables parent))
  
+(* the following is needed to properly tokenize include files.  Because an
+include file is included after seeing a @, so current_line_started is true.
+Current_line_started is not important for parsing the name of a rule, so we
+don't have to reset this value to true after parsing an included file. *)
+let include_init _ =
+  current_line_started := false
+
  let drop_spaces s =
    let len = String.length s in
    let rec loop n =
@@ -441,19 +517,50 @@ let real = pent exp | ((pent? '.' pfract | pent '.' pfract? ) exp?)
  
  
  rule token = parse
-  | [' ' '\t'  ]+             { start_line false; token lexbuf }
-  | ['\n' '\r' '\011' '\012'] { reset_line lexbuf; token lexbuf }
+  | [' ' '\t']* ['\n' '\r' '\011' '\012']
+    { let cls = !current_line_started in
+
+      if not cls
+      then
+       begin
+         match !current_line_type with
+           (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+             let info = get_current_line_type lexbuf in
+             reset_line lexbuf;
+             TPragma (Ast.Noindent "", info)
+         | _ -> reset_line lexbuf; token lexbuf
+       end
+      else (reset_line lexbuf; token lexbuf) }
+
+  | [' ' '\t'  ]+  { start_line false; token lexbuf }
+
+  | [' ' '\t'  ]* (("//" [^ '\n']*) as after) {
+    match !current_line_type with
+      (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+       let str =
+         if !current_line_started
+         then (tok lexbuf)
+         else after in
+       start_line true;
+       TPragma (Ast.Indent str, get_current_line_type lexbuf)
+    | _ -> start_line false; token lexbuf }
  
-  | "//" [^ '\n']* { start_line false; token lexbuf }
+  | "__attribute__" [' ' '\t']* "((" _* "))"
+   { match !current_line_type with
+      (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+       start_line true;
+       TPragma (Ast.Space (tok lexbuf), get_current_line_type lexbuf)
+    | _ -> failwith "attributes only allowedin + code" }
  
    | "@@" { start_line true; TArobArob }
    | "@"  { pass_zero();
            if !Data.in_rule_name or not !current_line_started
            then (start_line true; TArob)
-          else (check_minus_context_linetype "@"; TPArob) }
+          else (check_minus_context_linetype "@";
+                TPArob (get_current_line_type lexbuf)) }
  
-  | "~="  { start_line true; TTildeEq (get_current_line_type lexbuf) }
-  | "!~=" { start_line true; TTildeExclEq (get_current_line_type lexbuf) }
+  | "=~"  { start_line true; TTildeEq (get_current_line_type lexbuf) }
+  | "!~" { start_line true; TTildeExclEq (get_current_line_type lexbuf) }
    | "WHEN" | "when"
        { start_line true; check_minus_context_linetype (tok lexbuf);
         TWhen (get_current_line_type lexbuf) }
@@ -474,9 +581,9 @@ rule token = parse
              TOEllipsis (get_current_line_type lexbuf) }
    | "...>" { start_line true; check_context_linetype (tok lexbuf);
              TCEllipsis (get_current_line_type lexbuf) }
-  | "<+..." { start_line true; check_context_linetype (tok lexbuf);
+  | "<+..." { start_line true; check_minus_context_linetype (tok lexbuf);
              TPOEllipsis (get_current_line_type lexbuf) }
-  | "...+>" { start_line true; check_context_linetype (tok lexbuf);
+  | "...+>" { start_line true; check_minus_context_linetype (tok lexbuf);
              TPCEllipsis (get_current_line_type lexbuf) }
  (*
    | "<ooo" { start_line true; check_context_linetype (tok lexbuf);
@@ -513,24 +620,27 @@ rule token = parse
            else if !Data.in_meta
           then TBang0
            else (add_current_line_type D.UNIQUE; token lexbuf) }
-  | "(" { if not !col_zero
+  | "(" { if !Data.in_meta or not !col_zero
           then (start_line true; TOPar (get_current_line_type lexbuf))
            else
              (start_line true; check_context_linetype (tok lexbuf);
              TOPar0 (get_current_line_type lexbuf))}
-  | "\\(" { start_line true; TOPar0 (get_current_line_type lexbuf) }
+  | "\\(" { start_line true;
+           TOPar0 (contextify(get_current_line_type lexbuf)) }
    | "|" { if not (!col_zero)
           then (start_line true; TOr(get_current_line_type lexbuf))
            else (start_line true;
                 check_context_linetype (tok lexbuf);
                 TMid0 (get_current_line_type lexbuf))}
-  | "\\|" { start_line true; TMid0 (get_current_line_type lexbuf) }
+  | "\\|" { start_line true;
+           TMid0 (contextify(get_current_line_type lexbuf)) }
    | ")" { if not !col_zero
           then (start_line true; TCPar (get_current_line_type lexbuf))
            else
              (start_line true; check_context_linetype (tok lexbuf);
              TCPar0 (get_current_line_type lexbuf))}
-  | "\\)" { start_line true; TCPar0 (get_current_line_type lexbuf) }
+  | "\\)" { start_line true;
+           TCPar0 (contextify(get_current_line_type lexbuf)) }
  
    | '[' { start_line true; TOCro (get_current_line_type lexbuf)   }
    | ']' { start_line true; TCCro (get_current_line_type lexbuf)   }
@@ -590,7 +700,9 @@ rule token = parse
    | ">="           { start_line true;
                      TLogOp(Ast.SupEq,get_current_line_type lexbuf) }
    | "<="           { start_line true;
-                    TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
+                    if !Data.in_meta
+                    then TSub(get_current_line_type lexbuf)
+                    else TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
    | "<"            { start_line true;
                      TLogOp(Ast.Inf,get_current_line_type lexbuf) }
    | ">"            { start_line true;
@@ -600,14 +712,25 @@ rule token = parse
    | "||"           { start_line true; TOrLog  (get_current_line_type lexbuf) }
  
    | ">>"           { start_line true;
-                    TShOp(Ast.DecRight,get_current_line_type lexbuf) }
+                    TShROp(Ast.DecRight,get_current_line_type lexbuf) }
    | "<<"           { start_line true;
-                    TShOp(Ast.DecLeft,get_current_line_type lexbuf) }
+                    TShLOp(Ast.DecLeft,get_current_line_type lexbuf) }
  
    | "&"            { start_line true; TAnd    (get_current_line_type lexbuf) }
    | "^"            { start_line true; TXor(get_current_line_type lexbuf) }
  
    | "##"            { start_line true; TCppConcatOp }
+  | (( ("#" [' ' '\t']*  "undef" [' ' '\t']+)) as def)
+    ( (letter (letter |digit)*) as ident)
+      { start_line true;
+       let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
+         get_current_line_type lexbuf in
+       let off = String.length def in
+       (* -1 in the code below because the ident is not at the line start *)
+       TUndef
+         (lt,
+          check_var ident
+            (arity,line,lline,offset+off,col+off,[],[],[])) }
    | (( ("#" [' ' '\t']*  "define" [' ' '\t']+)) as def)
      ( (letter (letter |digit)*) as ident)
        { start_line true;
@@ -618,7 +741,7 @@ rule token = parse
         TDefine
           (lt,
            check_var ident
-            (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) }
+            (arity,line,lline,offset+off,col+off,[],[],[])) }
    | (( ("#" [' ' '\t']*  "define" [' ' '\t']+)) as def)
      ( (letter (letter | digit)*) as ident)
      '('
@@ -654,14 +777,21 @@ rule token = parse
    | "#" [' ' '\t']* "elif" [^'\n']*
    | "#" [' ' '\t']* "endif" [^'\n']*
    | "#" [' ' '\t']* "error" [^'\n']*
+  | "#" [' ' '\t']* "pragma" [^'\n']*
+  | "#" [' ' '\t']* "line" [^'\n']*
        { start_line true; check_plus_linetype (tok lexbuf);
-       TPragma (tok lexbuf, get_current_line_type lexbuf) }
+       TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) }
    | "/*"
-      { start_line true; check_plus_linetype (tok lexbuf);
+      {
+       match !current_line_type with
+        (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
+        start_line true;
         (* second argument to TPragma is not quite right, because
-          it represents only the first token of the comemnt, but that
+          it represents only the first token of the comment, but that
            should be good enough *)
-       TPragma ("/*"^(comment lexbuf), get_current_line_type lexbuf) }
+       TPragma (Ast.Indent("/*"^(comment check_comment lexbuf)),
+                get_current_line_type lexbuf)
+      |        _ -> let _ = comment (fun _ -> ()) lexbuf in token lexbuf }
    | "---" [^'\n']*
        { (if !current_line_started
        then lexerr "--- must be at the beginning of the line" "");
@@ -707,20 +837,55 @@ rule token = parse
  
  
  and char = parse
-  | (_ as x) "'"                                     { String.make 1 x }
-  | (("\\" (oct | oct oct | oct oct oct)) as x  "'") { x }
-  | (("\\x" (hex | hex hex)) as x  "'")       { x }
-  | (("\\" (_ as v)) as x "'")
-       { (match v with
-            | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> ()
-           | 'r' -> ()  | 'f' -> () | 'a' -> ()
-           | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
-            | 'e' -> ()
-           | _ -> lexerr "unrecognised symbol: " (tok lexbuf)
-           );
-          x
+  | (_ as x)                           { String.make 1 x ^ restchars lexbuf }
+  (* todo?: as for octal, do exception  beyond radix exception ? *)
+  | (("\\" (oct | oct oct | oct oct oct)) as x     ) { x ^ restchars lexbuf }
+  (* this rule must be after the one with octal, lex try first longest
+   * and when \7  we want an octal, not an exn.
+   *)
+  | (("\\x" ((hex | hex hex))) as x           )      { x ^ restchars lexbuf }
+  | (("\\" (_ as v))           as x           )
+       {
+          (match v with (* Machine specific ? *)
+          | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> () | 'r' -> ()
+          | 'f' -> () | 'a' -> ()
+         | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
+          | 'e' -> () (* linuxext: ? *)
+         | _ ->
+              Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+         );
+          x ^ restchars lexbuf
         }
-  | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
+  | _
+      { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+        tok lexbuf ^ restchars lexbuf
+      }
+
+and restchars = parse
+  | "'"                                { "" }
+  | (_ as x)                           { String.make 1 x ^ restchars lexbuf }
+  (* todo?: as for octal, do exception  beyond radix exception ? *)
+  | (("\\" (oct | oct oct | oct oct oct)) as x     ) { x ^ restchars lexbuf }
+  (* this rule must be after the one with octal, lex try first longest
+   * and when \7  we want an octal, not an exn.
+   *)
+  | (("\\x" ((hex | hex hex))) as x           )      { x ^ restchars lexbuf }
+  | (("\\" (_ as v))           as x           )
+       {
+          (match v with (* Machine specific ? *)
+          | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> () | 'r' -> ()
+          | 'f' -> () | 'a' -> ()
+         | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
+          | 'e' -> () (* linuxext: ? *)
+         | _ ->
+              Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+         );
+          x ^ restchars lexbuf
+       }
+  | _
+      { Common.pr2 ("LEXER: unrecognised symbol in char:"^tok lexbuf);
+        tok lexbuf ^ restchars lexbuf
+      }
  
  and string  = parse
    | '"'                                       { "" }
@@ -730,31 +895,40 @@ and string  = parse
    | ("\\" (_ as v)) as x
         {
           (match v with
-         | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> () | 'r' -> ()
-        | 'f' -> () | 'a' -> ()
-        | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
-         | 'e' -> ()
-         | '\n' -> ()
-         | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
+           | 'n' -> ()  | 't' -> ()   | 'v' -> ()  | 'b' -> () | 'r' -> ()
+           | 'f' -> () | 'a' -> ()
+           | '\\' -> () | '?'  -> () | '\'' -> ()  | '"' -> ()
+           | 'e' -> ()
+           | '\n' -> ()
+           | '(' -> () | '|' -> () | ')' -> ()
+           | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
          );
            x ^ string lexbuf
         }
    | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
  
-and comment = parse
-  | "*/"     { start_line true; tok lexbuf }
+and comment check_comment = parse
+  | "*/" { let s = tok lexbuf in check_comment s; start_line true; s }
    | ['\n' '\r' '\011' '\012']
-      { reset_line lexbuf; let s = tok lexbuf in s ^ comment lexbuf }
+      { let s = tok lexbuf in
+        (* even blank line should have a + *)
+        check_comment s;
+        reset_line lexbuf; s ^ comment check_comment lexbuf }
    | "+" { pass_zero();
           if !current_line_started
-         then (start_line true; let s = tok lexbuf in s^(comment lexbuf))
-         else comment lexbuf }
+         then (start_line true;
+               let s = tok lexbuf in s^(comment check_comment lexbuf))
+         else (start_line true; comment check_comment lexbuf) }
    (* noteopti: *)
-  | [^ '*'] { start_line true; let s = tok lexbuf in s ^ comment lexbuf }
-  | [ '*']   { start_line true; let s = tok lexbuf in s ^ comment lexbuf }
+  | [^ '*']
+      { let s = tok lexbuf in
+        check_comment s; start_line true; s ^ comment check_comment lexbuf }
+  | [ '*']
+      { let s = tok lexbuf in
+        check_comment s; start_line true; s ^ comment check_comment lexbuf }
    | _
        { start_line true; let s = tok lexbuf in
          Common.pr2 ("LEXER: unrecognised symbol in comment:"^s);
-        s ^ comment lexbuf
+        s ^ comment check_comment lexbuf
        }