Commit | Line | Data |
---|---|---|
34e49164 | 1 | (*****************************************************************************) |
ae4735db | 2 | (* src: Jon Harrop. |
34e49164 C |
3 | * |
4 | * "Certain applications are extremely well suited to functional | |
5 | * programming and parsing is one of them. Specifically, the ability to | |
6 | * write functional combinators that allow parsers for everything from | |
7 | * integers up to symbolic expressions to be composed is more general | |
8 | * and provides more opportunity for code reuse than the use of | |
9 | * conventional parser generators such as ocamllex and ocamlyacc. This | |
10 | * article explains how parser combinators may be designed and | |
11 | * implemented in OCaml, using the standard example of a calculator." | |
ae4735db | 12 | * |
34e49164 | 13 | * pad: a few bugfixes. I also put more restrictive and descriptive types. |
ae4735db | 14 | * |
34e49164 C |
15 | *) |
16 | ||
17 | (*****************************************************************************) | |
18 | ||
19 | (* A generic parser takes a list of stuff (either char for lexical | |
20 | * parser or tokens for grammar parser) and return something and the | |
21 | * remaing list of stuff. *) | |
22 | type ('a, 'b) genp = 'a list -> 'b * 'a list | |
23 | val val_of_parser : 'b * 'a list -> 'b | |
24 | ||
25 | (* lexer = parser of char list *) | |
26 | (* type 'a lexer = (char, 'a) genp *) | |
27 | ||
28 | (* grammer = parser ot tokens *) | |
29 | (* type 'a pparser = (token, 'a) genp *) | |
30 | ||
31 | ||
32 | val ( ||| ) : ('a, 'b) genp -> ('a, 'b) genp -> ('a, 'b) genp | |
33 | (* ('a -> 'b) -> ('a -> 'b) -> 'a -> 'b *) | |
34 | val ( +++ ) : ('a, 'b) genp -> ('a, 'c) genp -> ('a, 'b * 'c) genp | |
35 | (* ('a -> 'b * 'c) -> ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e *) | |
36 | ||
37 | val many : ('a, 'b) genp -> ('a, 'b list) genp | |
38 | (* ('a -> 'b * 'a) -> 'a -> 'b list * 'a *) | |
39 | ||
40 | val ( >| ) : ('a, 'b) genp -> ('b -> 'c) -> ('a, 'c) genp | |
41 | (* ('a -> 'b * 'c) -> ('b -> 'd) -> 'a -> 'd * 'c *) | |
42 | ||
43 | (* was called 'some', but confusing *) | |
44 | val pred : ('a -> bool) -> ('a, 'a) genp | |
45 | (* ('a -> bool) -> 'a list -> 'a * 'a list *) | |
46 | ||
47 | val a : 'a -> ('a, 'a) genp | |
48 | (* 'a -> 'a list -> 'a * 'a list *) | |
49 | ||
50 | val several : ('a -> bool) -> ('a, 'a list) genp | |
51 | (* ('a -> bool) -> 'a list -> 'a list * 'a list *) | |
52 | ||
53 | ||
54 | module Abstr : sig | |
55 | type t | |
56 | val x : t | |
57 | end | |
58 | ||
59 | val fin : ('a, Abstr.t) genp | |
60 | (* 'a list -> Abstr.t * 'b list *) | |
61 | ||
62 | ||
63 | val digit : char -> bool | |
64 | val alpha : char -> bool | |
65 | val symbol : char -> bool | |
66 | val alphanum : char -> bool | |
67 | val space : char -> bool | |
68 | ||
69 | val alphanum_underscore : char -> bool | |
70 | val alphanum_minus : char -> bool | |
71 | val alphanum_under_minus : char -> bool | |
72 | ||
73 | val collect : char * char list -> string | |
74 | val list_of_string : string -> char list | |
75 | ||
76 | ||
77 | (*****************************************************************************) | |
78 | type token = | |
79 | | IDENT of string | |
80 | | KWD of string | |
81 | | INT of string | |
82 | | SYM of string | |
83 | | STR of string | |
84 | ||
85 | val string_of_token : token -> string | |
86 | ||
87 | type lexer = (char, token) genp | |
88 | ||
89 | val rawident : lexer | |
90 | (* char list -> token * char list *) | |
91 | val rawnumber : lexer | |
92 | (* char list -> token * char list *) | |
93 | ||
94 | val rawsymbol : lexer | |
95 | ||
96 | (* not space, not digit *) | |
97 | val rawkeyword : lexer | |
98 | (* char list -> token * char list *) | |
99 | ||
100 | val rawstring : lexer | |
101 | ||
102 | val lex_gen : lexer -> string -> token list | |
103 | ||
104 | (*****************************************************************************) | |
ae4735db | 105 | val token : lexer |
34e49164 C |
106 | (* char list -> token * char list *) |
107 | val tokens : (char, token list) genp | |
108 | (* char list -> token list * char list *) | |
109 | ||
110 | val alltokens : (char, token list) genp | |
111 | (* char list -> token list * 'a list *) | |
112 | ||
113 | val lex : string -> token list | |
114 | ||
115 | ||
116 | (*****************************************************************************) | |
117 | (* cant use parser as it's a reseverd word *) | |
118 | type 'a pparser = (token, 'a) genp | |
119 | ||
120 | val ident : string pparser | |
121 | (* token list -> string * token list *) | |
122 | val int : string pparser | |
123 | (* token list -> string * token list *) | |
124 | val string : string pparser | |
125 | ||
126 | type expr = | |
127 | | Int of int | |
128 | | Var of string | |
129 | | Add of expr * expr | |
130 | | Mul of expr * expr | |
131 | ||
132 | val atom : expr pparser | |
133 | (* token list -> expr * token list *) | |
134 | val factor : expr pparser | |
135 | (* token list -> expr * token list *) | |
136 | val term : expr pparser | |
137 | (* token list -> expr * token list *) | |
138 | val expr : expr pparser | |
139 | (* token list -> expr * 'a list *) | |
140 | ||
141 | val parse : 'a pparser -> string -> 'a | |
142 | (* (token list -> 'a * 'b) -> string -> 'a *) | |
143 | ||
144 | ||
145 | (*****************************************************************************) | |
146 | ||
147 | module Infix : sig | |
148 | val ( ||| ) : ('a, 'b) genp -> ('a, 'b) genp -> ('a, 'b) genp | |
149 | (* ('a -> 'b) -> ('a -> 'b) -> 'a -> 'b *) | |
150 | val ( +++ ) : ('a, 'b) genp -> ('a, 'c) genp -> ('a, 'b * 'c) genp | |
151 | (* ('a -> 'b * 'c) -> ('c -> 'd * 'e) -> 'a -> ('b * 'd) * 'e *) | |
152 | val ( >| ) : ('a, 'b) genp -> ('b -> 'c) -> ('a, 'c) genp | |
153 | (* ('a -> 'b * 'c) -> ('b -> 'd) -> 'a -> 'd * 'c *) | |
154 | end |