Release coccinelle-0.2.2
[bpt/coccinelle.git] / parsing_cocci / lexer_cocci.mll
CommitLineData
9f8e26f4 1(*
ae4735db 2 * Copyright 2005-2010, Ecole des Mines de Nantes, University of Copenhagen
9f8e26f4
C
3 * Yoann Padioleau, Julia Lawall, Rene Rydhof Hansen, Henrik Stuart, Gilles Muller, Nicolas Palix
4 * This file is part of Coccinelle.
5 *
6 * Coccinelle is free software: you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation, according to version 2 of the License.
9 *
10 * Coccinelle is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with Coccinelle. If not, see <http://www.gnu.org/licenses/>.
17 *
18 * The authors reserve the right to distribute this or future versions of
19 * Coccinelle under other licenses.
20 *)
21
22
34e49164
C
23{
24open Parser_cocci_menhir
25module D = Data
26module Ast = Ast_cocci
27module Ast0 = Ast0_cocci
28module P = Parse_aux
29exception Lexical of string
30let tok = Lexing.lexeme
31
32let line = ref 1
33let logical_line = ref 0
34
35(* ---------------------------------------------------------------------- *)
36(* control codes *)
37
38(* Defined in data.ml
39type line_type = MINUS | OPTMINUS | UNIQUEMINUS | PLUS | CONTEXT | UNIQUE | OPT
40*)
41
42let current_line_type = ref (D.CONTEXT,!line,!logical_line)
43
44let prev_plus = ref false
45let line_start = ref 0 (* offset of the beginning of the line *)
46let get_current_line_type lexbuf =
47 let (c,l,ll) = !current_line_type in
48 let lex_start = Lexing.lexeme_start lexbuf in
49 let preceeding_spaces =
50 if !line_start < 0 then 0 else lex_start - !line_start in
708f4980 51 (*line_start := -1;*)
951c7801 52 prev_plus := (c = D.PLUS) or (c = D.PLUSPLUS);
34e49164
C
53 (c,l,ll,lex_start,preceeding_spaces,[],[],Ast0.NoMetaPos)
54let current_line_started = ref false
55let col_zero = ref true
56
57let reset_line lexbuf =
58 line := !line + 1;
59 current_line_type := (D.CONTEXT,!line,!logical_line);
60 current_line_started := false;
61 col_zero := true;
62 line_start := Lexing.lexeme_start lexbuf + 1
63
64let started_line = ref (-1)
65
66let start_line seen_char =
67 current_line_started := true;
68 col_zero := false;
69 (if seen_char && not(!line = !started_line)
70 then
71 begin
72 started_line := !line;
73 logical_line := !logical_line + 1
74 end)
75
76let pass_zero _ = col_zero := false
77
78let lexerr s1 s2 = raise (Lexical (Printf.sprintf "%s%s" s1 s2))
79
80let add_current_line_type x =
81 match (x,!current_line_type) with
82 (D.MINUS,(D.CONTEXT,ln,lln)) ->
83 current_line_type := (D.MINUS,ln,lln)
84 | (D.MINUS,(D.UNIQUE,ln,lln)) ->
85 current_line_type := (D.UNIQUEMINUS,ln,lln)
86 | (D.MINUS,(D.OPT,ln,lln)) ->
87 current_line_type := (D.OPTMINUS,ln,lln)
88 | (D.PLUS,(D.CONTEXT,ln,lln)) ->
89 current_line_type := (D.PLUS,ln,lln)
951c7801
C
90 | (D.PLUSPLUS,(D.CONTEXT,ln,lln)) ->
91 current_line_type := (D.PLUSPLUS,ln,lln)
34e49164
C
92 | (D.UNIQUE,(D.CONTEXT,ln,lln)) ->
93 current_line_type := (D.UNIQUE,ln,lln)
94 | (D.OPT,(D.CONTEXT,ln,lln)) ->
95 current_line_type := (D.OPT,ln,lln)
96 | _ -> lexerr "invalid control character combination" ""
97
98let check_minus_context_linetype s =
99 match !current_line_type with
951c7801 100 (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> lexerr "invalid in a + context: " s
34e49164
C
101 | _ -> ()
102
103let check_context_linetype s =
104 match !current_line_type with
105 (D.CONTEXT,_,_) -> ()
106 | _ -> lexerr "invalid in a nonempty context: " s
107
108let check_plus_linetype s =
109 match !current_line_type with
951c7801 110 (D.PLUS,_,_) | (D.PLUSPLUS,_,_) -> ()
34e49164
C
111 | _ -> lexerr "invalid in a non + context: " s
112
113let check_arity_context_linetype s =
114 match !current_line_type with
951c7801
C
115 (D.CONTEXT,_,_) | (D.PLUS,_,_) | (D.PLUSPLUS,_,_)
116 | (D.UNIQUE,_,_) | (D.OPT,_,_) -> ()
34e49164
C
117 | _ -> lexerr "invalid in a nonempty context: " s
118
aa721442
C
119let check_comment s =
120 if not !current_line_started
121 then lexerr "+ expected at the beginning of the line" s
122
34e49164
C
123let process_include start finish str =
124 (match !current_line_type with
951c7801 125 (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
34e49164
C
126 (try
127 let _ = Str.search_forward (Str.regexp "\\.\\.\\.") str start in
128 lexerr "... not allowed in + include" ""
129 with Not_found -> ())
130 | _ -> ());
131 String.sub str (start + 1) (finish - start - 1)
132
133(* ---------------------------------------------------------------------- *)
134type pm = PATCH | MATCH | UNKNOWN
135
136let pm = ref UNKNOWN
137
138let patch_or_match = function
139 PATCH ->
7f004419
C
140 if not !D.ignore_patch_or_match
141 then
142 (match !pm with
143 MATCH ->
144 lexerr "- or + not allowed in the first column for a match" ""
145 | PATCH -> ()
146 | UNKNOWN -> Flag.sgrep_mode2 := false; pm := PATCH)
34e49164 147 | MATCH ->
7f004419
C
148 if not !D.ignore_patch_or_match
149 then
150 (match !pm with
151 PATCH -> lexerr "* not allowed in the first column for a patch" ""
152 | MATCH -> ()
153 | UNKNOWN -> Flag.sgrep_mode2 := true; pm := MATCH)
34e49164
C
154 | _ -> failwith "unexpected argument"
155
156(* ---------------------------------------------------------------------- *)
157(* identifiers, including metavariables *)
158
159let metavariables = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
160
161let all_metavariables =
162 (Hashtbl.create(100) : (string,(string * (D.clt -> token)) list) Hashtbl.t)
163
164let type_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
165
166let declarer_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
167
168let iterator_names = (Hashtbl.create(100) : (string, D.clt -> token) Hashtbl.t)
169
170let rule_names = (Hashtbl.create(100) : (string, unit) Hashtbl.t)
171
172let check_var s linetype =
173 let fail _ =
174 if (!Data.in_prolog || !Data.in_rule_name) &&
175 Str.string_match (Str.regexp "<.*>") s 0
176 then TPathIsoFile s
177 else
178 try (Hashtbl.find metavariables s) linetype
179 with Not_found ->
180 (try (Hashtbl.find type_names s) linetype
181 with Not_found ->
182 (try (Hashtbl.find declarer_names s) linetype
faf9a90c 183 with Not_found ->
34e49164
C
184 (try (Hashtbl.find iterator_names s) linetype
185 with Not_found -> TIdent (s,linetype)))) in
186 if !Data.in_meta or !Data.in_rule_name
187 then (try Hashtbl.find rule_names s; TRuleName s with Not_found -> fail())
188 else fail()
189
190let id_tokens lexbuf =
191 let s = tok lexbuf in
192 let linetype = get_current_line_type lexbuf in
193 let in_rule_name = !Data.in_rule_name in
978fd7e5 194 let in_meta = !Data.in_meta && not !Data.saw_struct in
34e49164
C
195 let in_iso = !Data.in_iso in
196 let in_prolog = !Data.in_prolog in
197 match s with
198 "identifier" when in_meta -> check_arity_context_linetype s; TIdentifier
199 | "type" when in_meta -> check_arity_context_linetype s; TType
200 | "parameter" when in_meta -> check_arity_context_linetype s; TParameter
201 | "constant" when in_meta -> check_arity_context_linetype s; TConstant
faf9a90c
C
202 | "generated" when in_rule_name && not (!Flag.make_hrule = None) ->
203 check_arity_context_linetype s; TGenerated
34e49164
C
204 | "expression" when in_meta || in_rule_name ->
205 check_arity_context_linetype s; TExpression
113803cf
C
206 | "initialiser" when in_meta || in_rule_name ->
207 check_arity_context_linetype s; TInitialiser
208 | "initializer" when in_meta || in_rule_name ->
209 check_arity_context_linetype s; TInitialiser
34e49164
C
210 | "idexpression" when in_meta ->
211 check_arity_context_linetype s; TIdExpression
212 | "statement" when in_meta -> check_arity_context_linetype s; TStatement
213 | "function" when in_meta -> check_arity_context_linetype s; TFunction
214 | "local" when in_meta -> check_arity_context_linetype s; TLocal
215 | "list" when in_meta -> check_arity_context_linetype s; Tlist
216 | "fresh" when in_meta -> check_arity_context_linetype s; TFresh
217 | "typedef" when in_meta -> check_arity_context_linetype s; TTypedef
218 | "declarer" when in_meta -> check_arity_context_linetype s; TDeclarer
219 | "iterator" when in_meta -> check_arity_context_linetype s; TIterator
220 | "name" when in_meta -> check_arity_context_linetype s; TName
221 | "position" when in_meta -> check_arity_context_linetype s; TPosition
222 | "any" when in_meta -> check_arity_context_linetype s; TPosAny
223 | "pure" when in_meta && in_iso ->
224 check_arity_context_linetype s; TPure
225 | "context" when in_meta && in_iso ->
226 check_arity_context_linetype s; TContext
227 | "error" when in_meta -> check_arity_context_linetype s; TError
228 | "words" when in_meta -> check_context_linetype s; TWords
229
230 | "using" when in_rule_name || in_prolog -> check_context_linetype s; TUsing
ae4735db
C
231 | "virtual" when in_prolog or in_rule_name or in_meta ->
232 (* don't want to allow virtual as a rule name *)
233 check_context_linetype s; TVirtual
34e49164
C
234 | "disable" when in_rule_name -> check_context_linetype s; TDisable
235 | "extends" when in_rule_name -> check_context_linetype s; TExtends
236 | "depends" when in_rule_name -> check_context_linetype s; TDepends
237 | "on" when in_rule_name -> check_context_linetype s; TOn
238 | "ever" when in_rule_name -> check_context_linetype s; TEver
239 | "never" when in_rule_name -> check_context_linetype s; TNever
978fd7e5 240 (* exists and forall for when are reparsed in parse_cocci.ml *)
34e49164
C
241 | "exists" when in_rule_name -> check_context_linetype s; TExists
242 | "forall" when in_rule_name -> check_context_linetype s; TForall
b1b2de81
C
243 | "script" when in_rule_name -> check_context_linetype s; TScript
244 | "initialize" when in_rule_name -> check_context_linetype s; TInitialize
245 | "finalize" when in_rule_name -> check_context_linetype s; TFinalize
34e49164
C
246
247 | "char" -> Tchar linetype
248 | "short" -> Tshort linetype
249 | "int" -> Tint linetype
250 | "double" -> Tdouble linetype
251 | "float" -> Tfloat linetype
252 | "long" -> Tlong linetype
253 | "void" -> Tvoid linetype
978fd7e5
C
254 (* in_meta is only for the first keyword; drop it now to allow any type
255 name *)
256 | "struct" -> Data.saw_struct := true; Tstruct linetype
257 | "union" -> Data.saw_struct := true; Tunion linetype
258 | "enum" -> Data.saw_struct := true; Tenum linetype
34e49164
C
259 | "unsigned" -> Tunsigned linetype
260 | "signed" -> Tsigned linetype
faf9a90c 261
34e49164
C
262 | "auto" -> Tauto linetype
263 | "register" -> Tregister linetype
264 | "extern" -> Textern linetype
265 | "static" -> Tstatic linetype
266 | "inline" -> Tinline linetype
267 | "typedef" -> Ttypedef linetype
268
269 | "const" -> Tconst linetype
270 | "volatile" -> Tvolatile linetype
271
272 | "if" -> TIf linetype
273 | "else" -> TElse linetype
274 | "while" -> TWhile linetype
275 | "do" -> TDo linetype
276 | "for" -> TFor linetype
277 | "switch" -> TSwitch linetype
278 | "case" -> TCase linetype
279 | "default" -> TDefault linetype
280 | "return" -> TReturn linetype
281 | "break" -> TBreak linetype
282 | "continue" -> TContinue linetype
283 | "goto" -> TGoto linetype
284
285 | "sizeof" -> TSizeof linetype
286
c3e37e97
C
287 | "Expression" -> TIsoExpression
288 | "ArgExpression" -> TIsoArgExpression
289 | "TestExpression" -> TIsoTestExpression
290 | "ToTestExpression" -> TIsoToTestExpression
291 | "Statement" -> TIsoStatement
292 | "Declaration" -> TIsoDeclaration
293 | "Type" -> TIsoType
294 | "TopLevel" -> TIsoTopLevel
34e49164
C
295
296 | s -> check_var s linetype
297
298let mkassign op lexbuf =
299 TAssign (Ast.OpAssign op, (get_current_line_type lexbuf))
300
301let init _ =
302 line := 1;
303 logical_line := 0;
304 prev_plus := false;
305 line_start := 0;
306 current_line_started := false;
c3e37e97 307 current_line_type := (D.CONTEXT,0,0);
34e49164
C
308 col_zero := true;
309 pm := UNKNOWN;
310 Data.in_rule_name := false;
311 Data.in_meta := false;
312 Data.in_prolog := false;
978fd7e5 313 Data.saw_struct := false;
34e49164
C
314 Data.inheritable_positions := [];
315 Hashtbl.clear all_metavariables;
316 Hashtbl.clear Data.all_metadecls;
317 Hashtbl.clear metavariables;
318 Hashtbl.clear type_names;
319 Hashtbl.clear rule_names;
708f4980
C
320 Hashtbl.clear iterator_names;
321 Hashtbl.clear declarer_names;
34e49164
C
322 let get_name (_,x) = x in
323 Data.add_id_meta :=
324 (fun name constraints pure ->
325 let fn clt = TMetaId(name,constraints,pure,clt) in
326 Hashtbl.replace metavariables (get_name name) fn);
ae4735db
C
327 Data.add_virt_id_meta_found :=
328 (fun name vl ->
329 let fn clt = TIdent(vl,clt) in
330 Hashtbl.replace metavariables name fn);
331 Data.add_virt_id_meta_not_found :=
332 (fun name pure ->
333 let fn clt = TMetaId(name,Ast.IdNoConstraint,pure,clt) in
334 Hashtbl.replace metavariables (get_name name) fn);
b1b2de81
C
335 Data.add_fresh_id_meta :=
336 (fun name ->
951c7801 337 let fn clt = TMetaId(name,Ast.IdNoConstraint,Ast0.Impure,clt) in
b1b2de81 338 Hashtbl.replace metavariables (get_name name) fn);
34e49164
C
339 Data.add_type_meta :=
340 (fun name pure ->
341 let fn clt = TMetaType(name,pure,clt) in
342 Hashtbl.replace metavariables (get_name name) fn);
113803cf
C
343 Data.add_init_meta :=
344 (fun name pure ->
345 let fn clt = TMetaInit(name,pure,clt) in
346 Hashtbl.replace metavariables (get_name name) fn);
34e49164
C
347 Data.add_param_meta :=
348 (function name -> function pure ->
349 let fn clt = TMetaParam(name,pure,clt) in
350 Hashtbl.replace metavariables (get_name name) fn);
351 Data.add_paramlist_meta :=
352 (function name -> function lenname -> function pure ->
353 let fn clt = TMetaParamList(name,lenname,pure,clt) in
354 Hashtbl.replace metavariables (get_name name) fn);
355 Data.add_const_meta :=
356 (fun tyopt name constraints pure ->
357 let fn clt = TMetaConst(name,constraints,pure,tyopt,clt) in
358 Hashtbl.replace metavariables (get_name name) fn);
359 Data.add_err_meta :=
360 (fun name constraints pure ->
361 let fn clt = TMetaErr(name,constraints,pure,clt) in
362 Hashtbl.replace metavariables (get_name name) fn);
363 Data.add_exp_meta :=
364 (fun tyopt name constraints pure ->
365 let fn clt = TMetaExp(name,constraints,pure,tyopt,clt) in
366 Hashtbl.replace metavariables (get_name name) fn);
367 Data.add_idexp_meta :=
368 (fun tyopt name constraints pure ->
369 let fn clt = TMetaIdExp(name,constraints,pure,tyopt,clt) in
370 Hashtbl.replace metavariables (get_name name) fn);
371 Data.add_local_idexp_meta :=
372 (fun tyopt name constraints pure ->
373 let fn clt = TMetaLocalIdExp(name,constraints,pure,tyopt,clt) in
374 Hashtbl.replace metavariables (get_name name) fn);
375 Data.add_explist_meta :=
376 (function name -> function lenname -> function pure ->
377 let fn clt = TMetaExpList(name,lenname,pure,clt) in
378 Hashtbl.replace metavariables (get_name name) fn);
379 Data.add_stm_meta :=
380 (function name -> function pure ->
381 let fn clt = TMetaStm(name,pure,clt) in
382 Hashtbl.replace metavariables (get_name name) fn);
383 Data.add_stmlist_meta :=
384 (function name -> function pure ->
385 let fn clt = TMetaStmList(name,pure,clt) in
386 Hashtbl.replace metavariables (get_name name) fn);
387 Data.add_func_meta :=
388 (fun name constraints pure ->
389 let fn clt = TMetaFunc(name,constraints,pure,clt) in
390 Hashtbl.replace metavariables (get_name name) fn);
391 Data.add_local_func_meta :=
392 (fun name constraints pure ->
393 let fn clt = TMetaLocalFunc(name,constraints,pure,clt) in
394 Hashtbl.replace metavariables (get_name name) fn);
395 Data.add_iterator_meta :=
396 (fun name constraints pure ->
397 let fn clt = TMetaIterator(name,constraints,pure,clt) in
398 Hashtbl.replace metavariables (get_name name) fn);
399 Data.add_declarer_meta :=
400 (fun name constraints pure ->
401 let fn clt = TMetaDeclarer(name,constraints,pure,clt) in
402 Hashtbl.replace metavariables (get_name name) fn);
403 Data.add_pos_meta :=
404 (fun name constraints any ->
405 let fn ((d,ln,_,_,_,_,_,_) as clt) =
406 (if d = Data.PLUS
407 then
408 failwith
409 (Printf.sprintf "%d: positions only allowed in minus code" ln));
410 TMetaPos(name,constraints,any,clt) in
411 Hashtbl.replace metavariables (get_name name) fn);
412 Data.add_type_name :=
413 (function name ->
414 let fn clt = TTypeId(name,clt) in
415 Hashtbl.replace type_names name fn);
416 Data.add_declarer_name :=
417 (function name ->
418 let fn clt = TDeclarerId(name,clt) in
419 Hashtbl.replace declarer_names name fn);
420 Data.add_iterator_name :=
421 (function name ->
422 let fn clt = TIteratorId(name,clt) in
423 Hashtbl.replace iterator_names name fn);
424 Data.init_rule := (function _ -> Hashtbl.clear metavariables);
425 Data.install_bindings :=
426 (function parent ->
427 List.iter (function (name,fn) -> Hashtbl.add metavariables name fn)
428 (Hashtbl.find all_metavariables parent))
429
430let drop_spaces s =
431 let len = String.length s in
432 let rec loop n =
433 if n = len
434 then n
435 else
436 if List.mem (String.get s n) [' ';'\t']
437 then loop (n+1)
438 else n in
439 let start = loop 0 in
440 String.sub s start (len - start)
441}
442
443(* ---------------------------------------------------------------------- *)
444(* tokens *)
445
446let letter = ['A'-'Z' 'a'-'z' '_']
447let digit = ['0'-'9']
448
449let dec = ['0'-'9']
450let oct = ['0'-'7']
451let hex = ['0'-'9' 'a'-'f' 'A'-'F']
452
453let decimal = ('0' | (['1'-'9'] dec*))
454let octal = ['0'] oct+
faf9a90c 455let hexa = ("0x" |"0X") hex+
34e49164
C
456
457let pent = dec+
458let pfract = dec+
459let sign = ['-' '+']
460let exp = ['e''E'] sign? dec+
461let real = pent exp | ((pent? '.' pfract | pent '.' pfract? ) exp?)
462
463
464rule token = parse
c3e37e97
C
465 | [' ' '\t']* ['\n' '\r' '\011' '\012']
466 { let cls = !current_line_started in
467
468 if not cls
469 then
470 begin
471 match !current_line_type with
472 (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
473 let info = get_current_line_type lexbuf in
474 reset_line lexbuf;
475 TPragma (Ast.Noindent "", info)
476 | _ -> reset_line lexbuf; token lexbuf
477 end
478 else (reset_line lexbuf; token lexbuf) }
479
480 | [' ' '\t' ]+ { start_line false; token lexbuf }
481
482 | "//" [^ '\n']* {
483 match !current_line_type with
484 (D.PLUS,_,_) | (D.PLUSPLUS,_,_) ->
485 TPragma (Ast.Indent (tok lexbuf), get_current_line_type lexbuf)
486 | _ -> start_line false; token lexbuf }
34e49164
C
487
488 | "@@" { start_line true; TArobArob }
489 | "@" { pass_zero();
490 if !Data.in_rule_name or not !current_line_started
491 then (start_line true; TArob)
492 else (check_minus_context_linetype "@"; TPArob) }
493
951c7801
C
494 | "~=" { start_line true; TTildeEq (get_current_line_type lexbuf) }
495 | "!~=" { start_line true; TTildeExclEq (get_current_line_type lexbuf) }
34e49164
C
496 | "WHEN" | "when"
497 { start_line true; check_minus_context_linetype (tok lexbuf);
498 TWhen (get_current_line_type lexbuf) }
499
500 | "..."
501 { start_line true; check_minus_context_linetype (tok lexbuf);
502 TEllipsis (get_current_line_type lexbuf) }
503(*
504 | "ooo"
505 { start_line true; check_minus_context_linetype (tok lexbuf);
506 TCircles (get_current_line_type lexbuf) }
507
508 | "***"
509 { start_line true; check_minus_context_linetype (tok lexbuf);
510 TStars (get_current_line_type lexbuf) }
511*)
512 | "<..." { start_line true; check_context_linetype (tok lexbuf);
513 TOEllipsis (get_current_line_type lexbuf) }
514 | "...>" { start_line true; check_context_linetype (tok lexbuf);
515 TCEllipsis (get_current_line_type lexbuf) }
516 | "<+..." { start_line true; check_context_linetype (tok lexbuf);
517 TPOEllipsis (get_current_line_type lexbuf) }
518 | "...+>" { start_line true; check_context_linetype (tok lexbuf);
519 TPCEllipsis (get_current_line_type lexbuf) }
520(*
521 | "<ooo" { start_line true; check_context_linetype (tok lexbuf);
522 TOCircles (get_current_line_type lexbuf) }
523 | "ooo>" { start_line true; check_context_linetype (tok lexbuf);
524 TCCircles (get_current_line_type lexbuf) }
525
526 | "<***" { start_line true; check_context_linetype (tok lexbuf);
527 TOStars (get_current_line_type lexbuf) }
528 | "***>" { start_line true; check_context_linetype (tok lexbuf);
529 TCStars (get_current_line_type lexbuf) }
530*)
531 | "-" { pass_zero();
532 if !current_line_started
533 then (start_line true; TMinus (get_current_line_type lexbuf))
534 else (patch_or_match PATCH;
535 add_current_line_type D.MINUS; token lexbuf) }
536 | "+" { pass_zero();
537 if !current_line_started
538 then (start_line true; TPlus (get_current_line_type lexbuf))
539 else if !Data.in_meta
540 then TPlus0
541 else (patch_or_match PATCH;
542 add_current_line_type D.PLUS; token lexbuf) }
543 | "?" { pass_zero();
544 if !current_line_started
545 then (start_line true; TWhy (get_current_line_type lexbuf))
546 else if !Data.in_meta
547 then TWhy0
548 else (add_current_line_type D.OPT; token lexbuf) }
549 | "!" { pass_zero();
550 if !current_line_started
551 then (start_line true; TBang (get_current_line_type lexbuf))
552 else if !Data.in_meta
553 then TBang0
554 else (add_current_line_type D.UNIQUE; token lexbuf) }
555 | "(" { if not !col_zero
556 then (start_line true; TOPar (get_current_line_type lexbuf))
557 else
558 (start_line true; check_context_linetype (tok lexbuf);
559 TOPar0 (get_current_line_type lexbuf))}
560 | "\\(" { start_line true; TOPar0 (get_current_line_type lexbuf) }
561 | "|" { if not (!col_zero)
562 then (start_line true; TOr(get_current_line_type lexbuf))
563 else (start_line true;
564 check_context_linetype (tok lexbuf);
565 TMid0 (get_current_line_type lexbuf))}
566 | "\\|" { start_line true; TMid0 (get_current_line_type lexbuf) }
567 | ")" { if not !col_zero
568 then (start_line true; TCPar (get_current_line_type lexbuf))
569 else
570 (start_line true; check_context_linetype (tok lexbuf);
571 TCPar0 (get_current_line_type lexbuf))}
572 | "\\)" { start_line true; TCPar0 (get_current_line_type lexbuf) }
573
574 | '[' { start_line true; TOCro (get_current_line_type lexbuf) }
575 | ']' { start_line true; TCCro (get_current_line_type lexbuf) }
576 | '{' { start_line true; TOBrace (get_current_line_type lexbuf) }
577 | '}' { start_line true; TCBrace (get_current_line_type lexbuf) }
578
579 | "->" { start_line true; TPtrOp (get_current_line_type lexbuf) }
580 | '.' { start_line true; TDot (get_current_line_type lexbuf) }
581 | ',' { start_line true; TComma (get_current_line_type lexbuf) }
582 | ";" { start_line true;
583 if !Data.in_meta
584 then TMPtVirg (* works better with tokens_all *)
585 else TPtVirg (get_current_line_type lexbuf) }
586
faf9a90c 587
34e49164
C
588 | '*' { pass_zero();
589 if !current_line_started
590 then
591 (start_line true; TMul (get_current_line_type lexbuf))
592 else
593 (patch_or_match MATCH;
594 add_current_line_type D.MINUS; token lexbuf) }
595 | '/' { start_line true;
faf9a90c 596 TDmOp (Ast.Div,get_current_line_type lexbuf) }
34e49164 597 | '%' { start_line true;
faf9a90c
C
598 TDmOp (Ast.Mod,get_current_line_type lexbuf) }
599 | '~' { start_line true; TTilde (get_current_line_type lexbuf) }
600
951c7801
C
601 | "++" { pass_zero();
602 if !current_line_started
603 then
604 (start_line true; TInc (get_current_line_type lexbuf))
605 else (patch_or_match PATCH;
606 add_current_line_type D.PLUSPLUS; token lexbuf) }
34e49164 607 | "--" { start_line true; TDec (get_current_line_type lexbuf) }
faf9a90c
C
608
609 | "=" { start_line true; TEq (get_current_line_type lexbuf) }
610
34e49164
C
611 | "-=" { start_line true; mkassign Ast.Minus lexbuf }
612 | "+=" { start_line true; mkassign Ast.Plus lexbuf }
faf9a90c 613
34e49164
C
614 | "*=" { start_line true; mkassign Ast.Mul lexbuf }
615 | "/=" { start_line true; mkassign Ast.Div lexbuf }
616 | "%=" { start_line true; mkassign Ast.Mod lexbuf }
faf9a90c 617
34e49164
C
618 | "&=" { start_line true; mkassign Ast.And lexbuf }
619 | "|=" { start_line true; mkassign Ast.Or lexbuf }
620 | "^=" { start_line true; mkassign Ast.Xor lexbuf }
faf9a90c 621
34e49164
C
622 | "<<=" { start_line true; mkassign Ast.DecLeft lexbuf }
623 | ">>=" { start_line true; mkassign Ast.DecRight lexbuf }
624
625 | ":" { start_line true; TDotDot (get_current_line_type lexbuf) }
faf9a90c 626
951c7801
C
627 | "==" { start_line true; TEqEq (get_current_line_type lexbuf) }
628 | "!=" { start_line true; TNotEq (get_current_line_type lexbuf) }
34e49164
C
629 | ">=" { start_line true;
630 TLogOp(Ast.SupEq,get_current_line_type lexbuf) }
631 | "<=" { start_line true;
632 TLogOp(Ast.InfEq,get_current_line_type lexbuf) }
633 | "<" { start_line true;
faf9a90c 634 TLogOp(Ast.Inf,get_current_line_type lexbuf) }
34e49164
C
635 | ">" { start_line true;
636 TLogOp(Ast.Sup,get_current_line_type lexbuf) }
faf9a90c
C
637
638 | "&&" { start_line true; TAndLog (get_current_line_type lexbuf) }
34e49164 639 | "||" { start_line true; TOrLog (get_current_line_type lexbuf) }
faf9a90c 640
34e49164
C
641 | ">>" { start_line true;
642 TShOp(Ast.DecRight,get_current_line_type lexbuf) }
643 | "<<" { start_line true;
644 TShOp(Ast.DecLeft,get_current_line_type lexbuf) }
faf9a90c 645
34e49164
C
646 | "&" { start_line true; TAnd (get_current_line_type lexbuf) }
647 | "^" { start_line true; TXor(get_current_line_type lexbuf) }
648
978fd7e5 649 | "##" { start_line true; TCppConcatOp }
708f4980 650 | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def)
faf9a90c 651 ( (letter (letter |digit)*) as ident)
34e49164
C
652 { start_line true;
653 let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
654 get_current_line_type lexbuf in
708f4980 655 let off = String.length def in
34e49164
C
656 (* -1 in the code below because the ident is not at the line start *)
657 TDefine
658 (lt,
659 check_var ident
708f4980
C
660 (arity,line,lline,offset+off,col+off,[],[],Ast0.NoMetaPos)) }
661 | (( ("#" [' ' '\t']* "define" [' ' '\t']+)) as def)
faf9a90c 662 ( (letter (letter | digit)*) as ident)
34e49164
C
663 '('
664 { start_line true;
665 let (arity,line,lline,offset,col,strbef,straft,pos) as lt =
666 get_current_line_type lexbuf in
708f4980 667 let off = String.length def in
34e49164
C
668 TDefineParam
669 (lt,
670 check_var ident
671 (* why pos here but not above? *)
708f4980
C
672 (arity,line,lline,offset+off,col+off,strbef,straft,pos),
673 offset + off + (String.length ident),
674 col + off + (String.length ident)) }
34e49164
C
675 | "#" [' ' '\t']* "include" [' ' '\t']* '"' [^ '"']+ '"'
676 { TIncludeL
677 (let str = tok lexbuf in
678 let start = String.index str '"' in
679 let finish = String.rindex str '"' in
680 start_line true;
681 (process_include start finish str,get_current_line_type lexbuf)) }
682 | "#" [' ' '\t']* "include" [' ' '\t']* '<' [^ '>']+ '>'
683 { TIncludeNL
684 (let str = tok lexbuf in
685 let start = String.index str '<' in
686 let finish = String.rindex str '>' in
687 start_line true;
688 (process_include start finish str,get_current_line_type lexbuf)) }
689 | "#" [' ' '\t']* "if" [^'\n']*
690 | "#" [' ' '\t']* "ifdef" [^'\n']*
691 | "#" [' ' '\t']* "ifndef" [^'\n']*
692 | "#" [' ' '\t']* "else" [^'\n']*
693 | "#" [' ' '\t']* "elif" [^'\n']*
694 | "#" [' ' '\t']* "endif" [^'\n']*
695 | "#" [' ' '\t']* "error" [^'\n']*
696 { start_line true; check_plus_linetype (tok lexbuf);
c3e37e97 697 TPragma (Ast.Noindent(tok lexbuf), get_current_line_type lexbuf) }
0708f913
C
698 | "/*"
699 { start_line true; check_plus_linetype (tok lexbuf);
700 (* second argument to TPragma is not quite right, because
aa721442 701 it represents only the first token of the comment, but that
0708f913 702 should be good enough *)
c3e37e97
C
703 TPragma (Ast.Indent("/*"^(comment lexbuf)),
704 get_current_line_type lexbuf) }
34e49164
C
705 | "---" [^'\n']*
706 { (if !current_line_started
707 then lexerr "--- must be at the beginning of the line" "");
708 start_line true;
709 TMinusFile
710 (let str = tok lexbuf in
711 (drop_spaces(String.sub str 3 (String.length str - 3)),
712 (get_current_line_type lexbuf))) }
713 | "+++" [^'\n']*
714 { (if !current_line_started
715 then lexerr "+++ must be at the beginning of the line" "");
716 start_line true;
717 TPlusFile
718 (let str = tok lexbuf in
719 (drop_spaces(String.sub str 3 (String.length str - 3)),
720 (get_current_line_type lexbuf))) }
721
722 | letter (letter | digit)*
faf9a90c 723 { start_line true; id_tokens lexbuf }
34e49164
C
724
725 | "'" { start_line true;
726 TChar(char lexbuf,get_current_line_type lexbuf) }
727 | '"' { start_line true;
728 TString(string lexbuf,(get_current_line_type lexbuf)) }
729 | (real as x) { start_line true;
730 TFloat(x,(get_current_line_type lexbuf)) }
faf9a90c
C
731 | ((( decimal | hexa | octal)
732 ( ['u' 'U']
733 | ['l' 'L']
34e49164
C
734 | (['l' 'L'] ['u' 'U'])
735 | (['u' 'U'] ['l' 'L'])
736 | (['u' 'U'] ['l' 'L'] ['l' 'L'])
737 | (['l' 'L'] ['l' 'L'])
738 )?
739 ) as x) { start_line true; TInt(x,(get_current_line_type lexbuf)) }
740
741 | "<=>" { TIso }
742 | "=>" { TRightIso }
743
744 | eof { EOF }
745
746 | _ { lexerr "unrecognised symbol, in token rule: " (tok lexbuf) }
747
748
749and char = parse
750 | (_ as x) "'" { String.make 1 x }
751 | (("\\" (oct | oct oct | oct oct oct)) as x "'") { x }
752 | (("\\x" (hex | hex hex)) as x "'") { x }
753 | (("\\" (_ as v)) as x "'")
754 { (match v with
755 | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> ()
756 | 'r' -> () | 'f' -> () | 'a' -> ()
757 | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
758 | 'e' -> ()
759 | _ -> lexerr "unrecognised symbol: " (tok lexbuf)
760 );
761 x
faf9a90c 762 }
34e49164
C
763 | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
764
765and string = parse
766 | '"' { "" }
767 | (_ as x) { Common.string_of_char x ^ string lexbuf }
768 | ("\\" (oct | oct oct | oct oct oct)) as x { x ^ string lexbuf }
769 | ("\\x" (hex | hex hex)) as x { x ^ string lexbuf }
faf9a90c
C
770 | ("\\" (_ as v)) as x
771 {
34e49164 772 (match v with
9f8e26f4
C
773 | 'n' -> () | 't' -> () | 'v' -> () | 'b' -> () | 'r' -> ()
774 | 'f' -> () | 'a' -> ()
775 | '\\' -> () | '?' -> () | '\'' -> () | '"' -> ()
776 | 'e' -> ()
777 | '\n' -> ()
778 | '(' -> () | '|' -> () | ')' -> ()
779 | _ -> lexerr "unrecognised symbol:" (tok lexbuf)
34e49164
C
780 );
781 x ^ string lexbuf
782 }
783 | _ { lexerr "unrecognised symbol: " (tok lexbuf) }
0708f913
C
784
785and comment = parse
aa721442 786 | "*/" { let s = tok lexbuf in check_comment s; start_line true; s }
0708f913 787 | ['\n' '\r' '\011' '\012']
aa721442
C
788 { let s = tok lexbuf in
789 (* even blank line should have a + *)
790 check_comment s;
791 reset_line lexbuf; s ^ comment lexbuf }
0708f913
C
792 | "+" { pass_zero();
793 if !current_line_started
794 then (start_line true; let s = tok lexbuf in s^(comment lexbuf))
aa721442 795 else (start_line true; comment lexbuf) }
0708f913 796 (* noteopti: *)
aa721442
C
797 | [^ '*']
798 { let s = tok lexbuf in
799 check_comment s; start_line true; s ^ comment lexbuf }
800 | [ '*']
801 { let s = tok lexbuf in
802 check_comment s; start_line true; s ^ comment lexbuf }
951c7801 803 | _
0708f913
C
804 { start_line true; let s = tok lexbuf in
805 Common.pr2 ("LEXER: unrecognised symbol in comment:"^s);
806 s ^ comment lexbuf
807 }
808