coccinelle release 0.2.5
[bpt/coccinelle.git] / menhirlib / convert.ml
1 (**************************************************************************)
2 (* *)
3 (* Menhir *)
4 (* *)
5 (* François Pottier, INRIA Rocquencourt *)
6 (* Yann Régis-Gianas, PPS, Université Paris Diderot *)
7 (* *)
8 (* Copyright 2005-2008 Institut National de Recherche en Informatique *)
9 (* et en Automatique. All rights reserved. This file is distributed *)
10 (* under the terms of the GNU Library General Public License, with the *)
11 (* special exception on linking described in file LICENSE. *)
12 (* *)
13 (**************************************************************************)
14
15 (* An ocamlyacc-style, or Menhir-style, parser requires access to
16 the lexer, which must be parameterized with a lexing buffer, and
17 to the lexing buffer itself, where it reads position information. *)
18
19 (* This traditional API is convenient when used with ocamllex, but
20 inelegant when used with other lexer generators. *)
21
22 type ('token, 'semantic_value) traditional =
23 (Lexing.lexbuf -> 'token) -> Lexing.lexbuf -> 'semantic_value
24
25 (* This revised API is independent of any lexer generator. Here, the
26 parser only requires access to the lexer, and the lexer takes no
27 parameters. The tokens returned by the lexer may contain position
28 information. *)
29
30 type ('token, 'semantic_value) revised =
31 (unit -> 'token) -> 'semantic_value
32
33 (* --------------------------------------------------------------------------- *)
34
35 (* Converting a traditional parser, produced by ocamlyacc or Menhir,
36 into a revised parser. *)
37
38 (* A token of the revised lexer is essentially a triple of a token
39 of the traditional lexer (or raw token), a start position, and
40 and end position. The three [get] functions are accessors. *)
41
42 (* We do not require the type ['token] to actually be a triple type.
43 This enables complex applications where it is a record type with
44 more three fields. It also enables simple applications where
45 positions are of no interest, so ['token] is just ['raw_token]
46 and [get_startp] and [get_endp] return dummy positions. *)
47
48 let traditional2revised
49 (get_raw_token : 'token -> 'raw_token)
50 (get_startp : 'token -> Lexing.position)
51 (get_endp : 'token -> Lexing.position)
52 (parser : ('raw_token, 'semantic_value) traditional)
53 : ('token, 'semantic_value) revised =
54
55 (* Accept a revised lexer. *)
56
57 fun (lexer : unit -> 'token) ->
58
59 (* Create a dummy lexing buffer. *)
60
61 let lexbuf : Lexing.lexbuf =
62 Lexing.from_string ""
63 in
64
65 (* Wrap the revised lexer as a traditional lexer. A traditional
66 lexer returns a raw token and updates the fields of the lexing
67 buffer with new positions, which will be read by the parser. *)
68
69 let lexer (lexbuf : Lexing.lexbuf) : 'raw_token =
70 let token : 'token = lexer() in
71 lexbuf.Lexing.lex_start_p <- get_startp token;
72 lexbuf.Lexing.lex_curr_p <- get_endp token;
73 get_raw_token token
74 in
75
76 (* Invoke the traditional parser. *)
77
78 parser lexer lexbuf
79
80 (* --------------------------------------------------------------------------- *)
81
82 (* Converting a revised parser back to a traditional parser. *)
83
84 let revised2traditional
85 (make_token : 'raw_token -> Lexing.position -> Lexing.position -> 'token)
86 (parser : ('token, 'semantic_value) revised)
87 : ('raw_token, 'semantic_value) traditional =
88
89 (* Accept a traditional lexer and a lexing buffer. *)
90
91 fun (lexer : Lexing.lexbuf -> 'raw_token) (lexbuf : Lexing.lexbuf) ->
92
93 (* Wrap the traditional lexer as a revised lexer. *)
94
95 let lexer () : 'token =
96 let token : 'raw_token = lexer lexbuf in
97 make_token token lexbuf.Lexing.lex_start_p lexbuf.Lexing.lex_curr_p
98 in
99
100 (* Invoke the revised parser. *)
101
102 parser lexer
103
104 (* --------------------------------------------------------------------------- *)
105
106 (* Simplified versions of the above, where concrete triples are used. *)
107
108 module Simplified = struct
109
110 let traditional2revised parser =
111 traditional2revised
112 (fun (token, _, _) -> token)
113 (fun (_, startp, _) -> startp)
114 (fun (_, _, endp) -> endp)
115 parser
116
117 let revised2traditional parser =
118 revised2traditional
119 (fun token startp endp -> (token, startp, endp))
120 parser
121
122 end