Release coccinelle-0.1.2
[bpt/coccinelle.git] / commons / parser_combinators.mli
CommitLineData
34e49164
C
1(*****************************************************************************)
2(* src: Jon Harrop.
3 *
4 * "Certain applications are extremely well suited to functional
5 * programming and parsing is one of them. Specifically, the ability to
6 * write functional combinators that allow parsers for everything from
7 * integers up to symbolic expressions to be composed is more general
8 * and provides more opportunity for code reuse than the use of
9 * conventional parser generators such as ocamllex and ocamlyacc. This
10 * article explains how parser combinators may be designed and
11 * implemented in OCaml, using the standard example of a calculator."
12 *
13 * pad: a few bugfixes. I also put more restrictive and descriptive types.
14 *
15 *)
16
17(*****************************************************************************)
18
19(* A generic parser takes a list of stuff (either char for lexical
20 * parser or tokens for grammar parser) and return something and the
21 * remaing list of stuff. *)
22type ('a, 'b) genp = 'a list -> 'b * 'a list
23val val_of_parser : 'b * 'a list -> 'b
24
25(* lexer = parser of char list *)
26(* type 'a lexer = (char, 'a) genp *)
27
28(* grammer = parser ot tokens *)
29(* type 'a pparser = (token, 'a) genp *)
30
31
32val ( ||| ) : ('a, 'b) genp -> ('a, 'b) genp -> ('a, 'b) genp
33(* ('a -> 'b) -> ('a -> 'b) -> 'a -> 'b *)
34val ( +++ ) : ('a, 'b) genp -> ('a, 'c) genp -> ('a, 'b * 'c) genp
35(* ('a -> 'b * 'c) -> ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e *)
36
37val many : ('a, 'b) genp -> ('a, 'b list) genp
38(* ('a -> 'b * 'a) -> 'a -> 'b list * 'a *)
39
40val ( >| ) : ('a, 'b) genp -> ('b -> 'c) -> ('a, 'c) genp
41(* ('a -> 'b * 'c) -> ('b -> 'd) -> 'a -> 'd * 'c *)
42
43(* was called 'some', but confusing *)
44val pred : ('a -> bool) -> ('a, 'a) genp
45(* ('a -> bool) -> 'a list -> 'a * 'a list *)
46
47val a : 'a -> ('a, 'a) genp
48(* 'a -> 'a list -> 'a * 'a list *)
49
50val several : ('a -> bool) -> ('a, 'a list) genp
51(* ('a -> bool) -> 'a list -> 'a list * 'a list *)
52
53
54module Abstr : sig
55 type t
56 val x : t
57end
58
59val fin : ('a, Abstr.t) genp
60(* 'a list -> Abstr.t * 'b list *)
61
62
63val digit : char -> bool
64val alpha : char -> bool
65val symbol : char -> bool
66val alphanum : char -> bool
67val space : char -> bool
68
69val alphanum_underscore : char -> bool
70val alphanum_minus : char -> bool
71val alphanum_under_minus : char -> bool
72
73val collect : char * char list -> string
74val list_of_string : string -> char list
75
76
77(*****************************************************************************)
78type token =
79 | IDENT of string
80 | KWD of string
81 | INT of string
82 | SYM of string
83 | STR of string
84
85val string_of_token : token -> string
86
87type lexer = (char, token) genp
88
89val rawident : lexer
90(* char list -> token * char list *)
91val rawnumber : lexer
92(* char list -> token * char list *)
93
94val rawsymbol : lexer
95
96(* not space, not digit *)
97val rawkeyword : lexer
98(* char list -> token * char list *)
99
100val rawstring : lexer
101
102val lex_gen : lexer -> string -> token list
103
104(*****************************************************************************)
105val token : lexer
106(* char list -> token * char list *)
107val tokens : (char, token list) genp
108(* char list -> token list * char list *)
109
110val alltokens : (char, token list) genp
111(* char list -> token list * 'a list *)
112
113val lex : string -> token list
114
115
116(*****************************************************************************)
117(* cant use parser as it's a reseverd word *)
118type 'a pparser = (token, 'a) genp
119
120val ident : string pparser
121(* token list -> string * token list *)
122val int : string pparser
123(* token list -> string * token list *)
124val string : string pparser
125
126type expr =
127 | Int of int
128 | Var of string
129 | Add of expr * expr
130 | Mul of expr * expr
131
132val atom : expr pparser
133(* token list -> expr * token list *)
134val factor : expr pparser
135(* token list -> expr * token list *)
136val term : expr pparser
137(* token list -> expr * token list *)
138val expr : expr pparser
139(* token list -> expr * 'a list *)
140
141val parse : 'a pparser -> string -> 'a
142(* (token list -> 'a * 'b) -> string -> 'a *)
143
144
145(*****************************************************************************)
146
147module Infix : sig
148 val ( ||| ) : ('a, 'b) genp -> ('a, 'b) genp -> ('a, 'b) genp
149 (* ('a -> 'b) -> ('a -> 'b) -> 'a -> 'b *)
150 val ( +++ ) : ('a, 'b) genp -> ('a, 'c) genp -> ('a, 'b * 'c) genp
151 (* ('a -> 'b * 'c) -> ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e *)
152 val ( >| ) : ('a, 'b) genp -> ('b -> 'c) -> ('a, 'c) genp
153 (* ('a -> 'b * 'c) -> ('b -> 'd) -> 'a -> 'd * 'c *)
154end