Import Upstream version 20180207
[hcoop/debian/mlton.git] / lib / mlton / basic / regexp.sig
CommitLineData
7f918cf1
CE
1(* Copyright (C) 2009 Matthew Fluet.
2 * Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
3 * Jagannathan, and Stephen Weeks.
4 *
5 * MLton is released under a BSD-style license.
6 * See the file MLton-LICENSE for details.
7 *)
8
9signature REGEXP_STRUCTS =
10 sig
11 end
12
13signature REGEXP =
14 sig
15 include REGEXP_STRUCTS
16
17 structure Save:
18 sig
19 type t
20
21 val new: unit -> t
22 end
23
24 structure Match:
25 sig
26 type t
27
28 val all: t -> Substring.t
29 val startLength: t -> {start: int, length: int}
30 val exists: t * Save.t -> bool
31 val funs: t -> {exists: Save.t -> bool,
32 lookup: Save.t -> Substring.t,
33 peek: Save.t -> Substring.t option}
34 val length: t -> int
35 val lookup: t * Save.t -> Substring.t
36 val lookupString: t * Save.t -> String.t
37 val peek: t * Save.t -> Substring.t option
38 val peekString: t * Save.t -> String.t option
39 val stringFuns: t -> {exists: Save.t -> bool,
40 lookup: Save.t -> String.t,
41 peek: Save.t -> String.t option}
42 end
43
44 structure Compiled:
45 sig
46 type t
47
48 (* Find the first substring of s starting at or after index i that
49 * matches r. Return the first character and the character just
50 * past the end of the substring.
51 *)
52 val findShort: t * string * int -> Match.t option
53 val findLong: t * string * int -> Match.t option
54 val foreachMatchShort: t * string * (Match.t -> unit) -> unit
55 val layout: t -> Layout.t
56 val layoutDot: t -> Layout.t
57 val layoutDotToFile: t * File.t -> unit
58 (* match (r, s, i)
59 * Return the (shortest or longest) substring of s starting at index
60 * i that matches r.
61 * The substring is represented by the index of the character just
62 * past its end.
63 * Return NONE if there is NO match.
64 * All of the saves in the match will be set.
65 *)
66 val matchAll: t * string -> Match.t option
67 val matchLong: t * string * int -> Match.t option
68 val matchShort: t * string * int -> Match.t option
69 val matchesAll: t * string -> bool
70 val matchesPrefix: t * string -> bool
71 end
72
73 type t
74
75 val anchorFinish: t
76 val anchorStart: t
77 val any: t (* arbitrary character *)
78 val anys: t (* arbitrary number of characters *)
79 val ascii: t (* arbitrary ascii character *)
80 val asciis: t (* arbitrary ascii characters *)
81 val char: char -> t
82 val compileDFA: t -> Compiled.t
83 val compileNFA: t -> Compiled.t
84 val digit: t
85 val digits: t
86 val dquote: t (* char #"\"" *)
87 val fromString: string -> (t * Save.t vector) option
88 val isChar: (char -> bool) -> t
89 val isNotChar: (char -> bool) -> t
90 val layout: t -> Layout.t
91 val nonDigit: t
92 val none: t
93 val notChar: char -> t
94 val notOneOf: string -> t
95 val null: t (* empty string *)
96 val oneOf: string -> t
97 val oneOrMore: t -> t
98 val optional: t -> t
99 val or: t list -> t
100 val save: t * Save.t -> t
101 val seq: t list -> t
102 val spaces: t (* star (isChar Char.isSpace) *)
103 val star: t -> t
104 val string: string -> t (* case matters *)
105 val stringIgnoreCase: string -> t (* case doesn't matter *)
106 val toString: t -> string
107 val zeroOrMore: t -> t (* same as star *)
108 end
109
110
111functor TestRegexp (S: REGEXP): sig end =
112struct
113
114val _ = print "TestRegexp\n"
115
116open S
117open Compiled
118val compile = if true then compileNFA else compileDFA
119
120val _ =
121 Assert.assert
122 ("TestRegexp.save", fn () =>
123 let
124 val s = Save.new ()
125 in
126 List.forall
127 ([(save (seq [], s), "", ""),
128 (save (star (oneOf "a"), s), "", ""),
129 (seq [save (seq [], s), seq []], "", ""),
130 (seq [oneOf "a", save (seq [], s)], "a", "")],
131 fn (r, s1, s2) =>
132 let
133 val c = compile r
134 in
135 case matchAll (c, s1) of
136 NONE => false
137 | SOME m => Match.lookupString (m, s) = s2
138 end)
139 end)
140
141val _ =
142 Assert.assert
143 ("TestRegexp.doesMatchAll", fn () =>
144 List.forall ([(any, "a"),
145 (anys, "abc")],
146 fn (r, s) => matchesAll (compile r, s)))
147val tests =
148 List.map ([
149 ("\\a", "a"),
150 ("^$", ""),
151 ("abc", "abc"),
152 (".", "a"),
153 ("^foo$", "foo"),
154 ("^...$", "foo"),
155 ("^.*$", "foo"),
156 ("^.*foo@bar\\.com$", "foo@bar.com"),
157 ("(abc)","abc"),
158 ("\\(abc\\)","(abc)"),
159 ("(abc){2,4}$", "abcabc"),
160 ("(abc){2,4}$", "abcabcabc"),
161 ("(abc){2,4}$", "abcabcabcabc")
162 ],
163 fn (r, s) =>
164 let
165 val opt = SOME (String.size s)
166 in
167 (#1 (valOf (fromString r)), s, opt, opt)
168 end)
169 @
170 [
171 (#1 (valOf (fromString "a")), "a", SOME 1, SOME 1),
172 (#1 (valOf (fromString "a*")), "a", SOME 0, SOME 1),
173 (#1 (valOf (fromString "a+")), "a", SOME 1, SOME 1),
174 (#1 (valOf (fromString "a+")), "aa", SOME 1, SOME 2),
175 (#1 (valOf (fromString "[^a]")), "a", NONE, NONE),
176 (#1 (valOf (fromString "[^a]")), "b", SOME 1, SOME 1),
177 (stringIgnoreCase "abc", "abc", SOME (3: int), SOME (3: int)),
178 (stringIgnoreCase "abc", "aBC", SOME 3, SOME 3),
179 (stringIgnoreCase "ab", "abab", SOME 2, SOME 2),
180 (string "abc", "abc", SOME 3, SOME 3),
181 (string "Abc", "abc", NONE, NONE),
182 (seq [anchorStart, anchorFinish], "", SOME 0, SOME 0),
183 (seq [anchorStart, string "abc", anchorFinish], "abc", SOME 3, SOME 3),
184 (seq [or [null, anchorFinish], string "a"], "a", SOME 1, SOME 1),
185 (seq [or [anchorFinish, null], string "a"], "a", SOME 1, SOME 1),
186 (seq [], "abc", SOME 0, SOME 0),
187 (seq [string "ab"], "ab", SOME 2, SOME 2),
188 (seq [char #"a", char #"b", char #"c"], "abc", SOME 3, SOME 3),
189 (seq [string "ab", null], "abc", SOME 2, SOME 2),
190 (or [string "a", string "ab", string "abc"], "abc", SOME 1, SOME 3),
191 (seq [or [string "ab", null],
192 or [string "abcde", string "cd"]], "abcde",
193 SOME 4, SOME 5),
194 (star (or [null, char #"a"]), "aaa", SOME 0, SOME 3),
195 (star (string "ab"), "ababab", SOME 0, SOME 6),
196 let val r = Save.new ()
197 in (save (string "ab", r), "ab", SOME 2, SOME 2)
198 end,
199 let val r = Save.new ()
200 in (seq [string "a", save (string "bc", r), string "d"],
201 "abcd", SOME 4, SOME 4)
202 end,
203 let val s1 = Save.new ()
204 val s2 = Save.new ()
205 in (seq [save (string "a", s1),
206 save (string "b", s2)],
207 "ab", SOME 2, SOME 2)
208 end,
209 let val s1 = Save.new ()
210 val s2 = Save.new ()
211 in (seq [save (string "a", s1),
212 string "b",
213 save (string "c", s2),
214 string "d"],
215 "abcd",
216 SOME 4, SOME 4)
217 end,
218let val s1 = Save.new ()
219in (seq [string "a",
220 save (string "b", s1),
221 string "c"],
222 "abc",
223 SOME 3, SOME 3)
224end,
225let val s1 = Save.new ()
226in (seq [string "abc",
227 save (string "d", s1),
228 string "e"],
229 "abcde",
230 SOME 5, SOME 5)
231end,
232let val s1 = Save.new ()
233 val s2 = Save.new ()
234in (seq [string "abc",
235 save (string "d", s1),
236 string "e",
237 save (string "f", s2)],
238 "abcdef",
239 SOME 6, SOME 6)
240end,
241let val s1 = Save.new ()
242 val s2 = Save.new ()
243in (seq [string "abc",
244 save (string "d", s1),
245 string "e",
246 save (string "fgh", s2)],
247 "abcdefgh",
248 SOME 8, SOME 8)
249end
250]
251
252val _ =
253 Assert.assert
254 ("Test.Regexp.match", fn () =>
255 List.forall (tests,
256 fn (r, s: string, i1, i2) =>
257 let
258 val r = compile r
259 val _ = Compiled.layoutDotToFile (r, "/tmp/z.dot")
260 fun doit m = Option.map (m (r, s, 0), Match.length)
261 in
262 i1 = doit matchShort
263 andalso i2 = doit matchLong
264 end))
265
266val tests =
267 [(string "abc", "123abc", SOME (3: int, 6: int)),
268 (string "abc", "123abcde", SOME (3, 6)),
269 (string "abd", "123abcde", NONE),
270 (seq [string "a", star (string "ab"), string "c"],
271 "1234aabababcdef", SOME (4, 12))]
272
273val _ =
274 Assert.assert
275 ("Regexp.findShort", fn () =>
276 List.forall
277 (tests, fn (r, s, opt) =>
278 opt = (Option.map
279 (findShort (compile r, s, 0), fn m =>
280 let val (_, {start, length}) = Substring.base (Match.all m)
281 in (start, start + length)
282 end))))
283
284val _ =
285 Assert.assert
286 ("Regexp.findShort2", fn () =>
287 List.forall
288 ([(SOME (2, 4), (string "cd", "abcdef", 0)),
289 (SOME (2, 4), (seq [char #"c", star (isNotChar Char.isSpace)],
290 "abcd fg", 0))],
291 fn (res, (r, s, i)) =>
292 res =
293 Option.map (findLong (compile r, s, i), fn m =>
294 let val (_, {start, length}) = Substring.base (Match.all m)
295 in (start, start + length)
296 end)))
297
298end