Release coccinelle-0.1.2
[bpt/coccinelle.git] / commons / parser_combinators.mli
1 (*****************************************************************************)
2 (* src: Jon Harrop.
3 *
4 * "Certain applications are extremely well suited to functional
5 * programming and parsing is one of them. Specifically, the ability to
6 * write functional combinators that allow parsers for everything from
7 * integers up to symbolic expressions to be composed is more general
8 * and provides more opportunity for code reuse than the use of
9 * conventional parser generators such as ocamllex and ocamlyacc. This
10 * article explains how parser combinators may be designed and
11 * implemented in OCaml, using the standard example of a calculator."
12 *
13 * pad: a few bugfixes. I also put more restrictive and descriptive types.
14 *
15 *)
16
17 (*****************************************************************************)
18
19 (* A generic parser takes a list of stuff (either char for lexical
20 * parser or tokens for grammar parser) and return something and the
21 * remaing list of stuff. *)
22 type ('a, 'b) genp = 'a list -> 'b * 'a list
23 val val_of_parser : 'b * 'a list -> 'b
24
25 (* lexer = parser of char list *)
26 (* type 'a lexer = (char, 'a) genp *)
27
28 (* grammer = parser ot tokens *)
29 (* type 'a pparser = (token, 'a) genp *)
30
31
32 val ( ||| ) : ('a, 'b) genp -> ('a, 'b) genp -> ('a, 'b) genp
33 (* ('a -> 'b) -> ('a -> 'b) -> 'a -> 'b *)
34 val ( +++ ) : ('a, 'b) genp -> ('a, 'c) genp -> ('a, 'b * 'c) genp
35 (* ('a -> 'b * 'c) -> ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e *)
36
37 val many : ('a, 'b) genp -> ('a, 'b list) genp
38 (* ('a -> 'b * 'a) -> 'a -> 'b list * 'a *)
39
40 val ( >| ) : ('a, 'b) genp -> ('b -> 'c) -> ('a, 'c) genp
41 (* ('a -> 'b * 'c) -> ('b -> 'd) -> 'a -> 'd * 'c *)
42
43 (* was called 'some', but confusing *)
44 val pred : ('a -> bool) -> ('a, 'a) genp
45 (* ('a -> bool) -> 'a list -> 'a * 'a list *)
46
47 val a : 'a -> ('a, 'a) genp
48 (* 'a -> 'a list -> 'a * 'a list *)
49
50 val several : ('a -> bool) -> ('a, 'a list) genp
51 (* ('a -> bool) -> 'a list -> 'a list * 'a list *)
52
53
54 module Abstr : sig
55 type t
56 val x : t
57 end
58
59 val fin : ('a, Abstr.t) genp
60 (* 'a list -> Abstr.t * 'b list *)
61
62
63 val digit : char -> bool
64 val alpha : char -> bool
65 val symbol : char -> bool
66 val alphanum : char -> bool
67 val space : char -> bool
68
69 val alphanum_underscore : char -> bool
70 val alphanum_minus : char -> bool
71 val alphanum_under_minus : char -> bool
72
73 val collect : char * char list -> string
74 val list_of_string : string -> char list
75
76
77 (*****************************************************************************)
78 type token =
79 | IDENT of string
80 | KWD of string
81 | INT of string
82 | SYM of string
83 | STR of string
84
85 val string_of_token : token -> string
86
87 type lexer = (char, token) genp
88
89 val rawident : lexer
90 (* char list -> token * char list *)
91 val rawnumber : lexer
92 (* char list -> token * char list *)
93
94 val rawsymbol : lexer
95
96 (* not space, not digit *)
97 val rawkeyword : lexer
98 (* char list -> token * char list *)
99
100 val rawstring : lexer
101
102 val lex_gen : lexer -> string -> token list
103
104 (*****************************************************************************)
105 val token : lexer
106 (* char list -> token * char list *)
107 val tokens : (char, token list) genp
108 (* char list -> token list * char list *)
109
110 val alltokens : (char, token list) genp
111 (* char list -> token list * 'a list *)
112
113 val lex : string -> token list
114
115
116 (*****************************************************************************)
117 (* cant use parser as it's a reseverd word *)
118 type 'a pparser = (token, 'a) genp
119
120 val ident : string pparser
121 (* token list -> string * token list *)
122 val int : string pparser
123 (* token list -> string * token list *)
124 val string : string pparser
125
126 type expr =
127 | Int of int
128 | Var of string
129 | Add of expr * expr
130 | Mul of expr * expr
131
132 val atom : expr pparser
133 (* token list -> expr * token list *)
134 val factor : expr pparser
135 (* token list -> expr * token list *)
136 val term : expr pparser
137 (* token list -> expr * token list *)
138 val expr : expr pparser
139 (* token list -> expr * 'a list *)
140
141 val parse : 'a pparser -> string -> 'a
142 (* (token list -> 'a * 'b) -> string -> 'a *)
143
144
145 (*****************************************************************************)
146
147 module Infix : sig
148 val ( ||| ) : ('a, 'b) genp -> ('a, 'b) genp -> ('a, 'b) genp
149 (* ('a -> 'b) -> ('a -> 'b) -> 'a -> 'b *)
150 val ( +++ ) : ('a, 'b) genp -> ('a, 'c) genp -> ('a, 'b * 'c) genp
151 (* ('a -> 'b * 'c) -> ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e *)
152 val ( >| ) : ('a, 'b) genp -> ('b -> 'c) -> ('a, 'c) genp
153 (* ('a -> 'b * 'c) -> ('b -> 'd) -> 'a -> 'd * 'c *)
154 end