Import Upstream version 20180207
[hcoop/debian/mlton.git] / lib / mlton / basic / url.sig
CommitLineData
7f918cf1
CE
1(* Copyright (C) 2009 Matthew Fluet.
2 * Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh
3 * Jagannathan, and Stephen Weeks.
4 *
5 * MLton is released under a BSD-style license.
6 * See the file MLton-LICENSE for details.
7 *)
8
9(*
10 * URLs, as described in RFC 2396.
11 *
12 * For ease of programming, I merged all of the nested components into one type
13 * with lots of option components.
14 *)
15signature URL =
16 sig
17 structure Char:
18 sig
19 type t = Char.t
20
21 val escapeHex: t -> string
22 end
23
24 structure Scheme:
25 sig
26 datatype t =
27 File
28 | Ftp
29 | Gopher
30 | Http
31 | Https
32 | Telnet
33 end
34
35 structure Authority:
36 sig
37 type t = {host: string,
38 port: int option,
39 user: string option}
40
41 val equals: t * t -> bool
42 end
43
44 structure Path:
45 sig
46 type t = {isAbsolute: bool,
47 path: string list,
48 file: string}
49
50 val file: t -> string
51 val layout: t -> Layout.t
52 val root: t
53 val toString: t -> string
54 end
55
56 datatype t =
57 T of {authority: Authority.t option,
58 fragment: string option,
59 path: Path.t option,
60 query: string option,
61 scheme: Scheme.t option} (* NONE in relative urls *)
62 | JavaScript of string
63 | MailTo of string
64 | News of string
65 | Opaque of {scheme: string,
66 rest: string}
67
68 val addQuery: t * string -> t
69 val canonicalize: t -> t
70 val equals: t * t -> bool
71 val escape: string -> string (* Insert %XX escapes into string. *)
72 val escapeQuery: bool ref
73 val fromString: string -> t option
74 val getMatch: Regexp.Match.t -> t
75 val host: t -> string
76 val layout: t -> Layout.t
77 val mailto: string -> t
78 val path: t -> Path.t
79 (* relativize {base = b, relative = r}
80 * trys turn r into a url relative to b
81 *)
82 val relativize: {base: t, relative: t} -> t option
83 (* resolve {base = b, relative = r}
84 * interprets r relative to b, returning an absolute URL.
85 *)
86 val resolve: {base: t, relative: t} -> t
87 val toString: t -> string
88 val unescape: string -> string (* Remove %XX escapes from string. *)
89
90 structure Regexp:
91 sig
92 type t = Regexp.t
93
94 val absoluteUrl: t
95 val absPath: t
96 val authority: t
97 val query: t
98
99 val getAbsPath: Regexp.Match.t -> Path.t
100 val peekQuery: Regexp.Match.t -> string option
101 end
102 end
103
104functor TestUrl (S: URL): sig end =
105struct
106
107open S
108
109val _ =
110 Assert.assert
111 ("TestUrl.resolve", fn () =>
112 (* Examples from RFC 2396, Appendix C. *)
113 let
114 val base = valOf (fromString "http://a/b/c/d;p?q")
115 val examples =
116 [("g", "http://a/b/c/g"),
117 ("./g", "http://a/b/c/g"),
118 ("g/", "http://a/b/c/g/"),
119 ("/g", "http://a/g"),
120 ("?y", "http://a/b/c/?y"),
121 ("g?y", "http://a/b/c/g?y"),
122 ("#s", "http://a/b/c/d;p?q#s"),
123 ("g#s", "http://a/b/c/g#s"),
124 ("g?y#s", "http://a/b/c/g?y#s"),
125 (";x", "http://a/b/c/;x"),
126 ("g;x", "http://a/b/c/g;x"),
127 ("g;x?y#s", "http://a/b/c/g;x?y#s"),
128 (".", "http://a/b/c/"),
129 ("./", "http://a/b/c/"),
130 ("..", "http://a/b/"),
131 ("../", "http://a/b/"),
132 ("../g", "http://a/b/g"),
133 ("../..", "http://a/"),
134 ("../../", "http://a/"),
135 ("../../g", "http://a/g")]
136 fun checkResolve (rel, abs) =
137 abs = toString (resolve {base = base,
138 relative = valOf (fromString rel)})
139 val checkResolve =
140 Trace.trace2
141 ("TestUrl.checkResolve", String.layout, String.layout, Bool.layout)
142 checkResolve
143 in List.forall ([("g:h", "g:h"), ("//g", "http://g")],
144 checkResolve)
145 andalso
146 List.forall
147 (examples, fn (rel, abs) =>
148 checkResolve (rel, abs) andalso
149 checkResolve (toString
150 (valOf
151 (relativize {base = base,
152 relative = valOf (fromString abs)})),
153 abs))
154 end)
155
156val _ =
157 Assert.assert
158 ("TestUrl", fn () =>
159 fromString "mailto:sweeks@sweeks.com" = SOME (MailTo "sweeks@sweeks.com")
160 andalso isSome
161 (fromString "http://sports.latimes.com/RealMedia/ads/adstream_lx.ads/sports.latimes.com/stats/oth/oth/oth/columnists.html/21801/Top/NextCardGW002/u40_card_dreamer_V3.gif/63306138643531333339663061393230")
162 andalso isSome (fromString
163 "http://dps1.travelocity.com:80/airpprice.ctl?previous_page=airpdisp&mixed_gt=N&tkt_status=N&option_num=1&seg_for_sell=1%26SJC%26San%20Jose,%20CA%2620001123%260750%26AA%26American%20Airlines%262456%26L%260%26McDonnell%20Douglas%20SP80%20Jet%26DFW%26Dallas%2fFt%20Worth,%20TX%261313%2620001123%26Thursday%26%26%26S80%26Y|1%26DFW%26Dallas%2fFt%20Worth,%20TX%2620001123%261433%26AA%26American%20Airlines%263741%26L%260%26Embraer%20ERJ-145%20Jet%26OKC%26Oklahoma%20City,%20OK%261529%2620001123%26Thursday%26%26%26ER4%26Y%3a1%26DFW%26Dallas%2fFt%20Worth,%20TX%2620001126%260918%26AA%26American%20Airlines%262451%26V%260%26McDonnell%20Douglas%20SP80%20Jet%26SJC%26San%20Jose,%20CA%261057%2620001126%26Sunday%26%26%26S80%26Y&hold_flag=N&SEQ=97122479938121310102000&LANG=EN&last_pgd_page=airpdisp.pgd")
164 andalso isSome (fromString
165 "large-int.html#SIG:INT_INF.\\|@LT\\|\\|@LT\\|:VAL:SPEC")
166 andalso
167 List.forall
168 ([("http://Norma140.emp3.com/cgibin/optin/remove.pl",
169 SOME Scheme.Http, NONE, "Norma140.emp3.com", NONE,
170 SOME (true, ["cgibin", "optin"], "remove.pl"),
171 NONE, NONE),
172 ("http://s7.sprintpcs.com/store/..\\store\\cc_Popup_aa.asp",
173 SOME Scheme.Http, NONE, "s7.sprintpcs.com", NONE,
174 SOME (true, ["store"], "..\\store\\cc_Popup_aa.asp"),
175 NONE, NONE),
176 ("http://www.sds.lcs.mit.edu/spd/larch/",
177 SOME Scheme.Http, NONE, "www.sds.lcs.mit.edu", NONE,
178 SOME (true, ["spd", "larch"], ""),
179 NONE, NONE),
180 ("http://foo.com/hello",
181 SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, [], "hello"),
182 NONE, NONE),
183 ("http://foo.com/hello/",
184 SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, ["hello"], ""),
185 NONE, NONE),
186 ("http://foo.com",
187 SOME Scheme.Http, NONE, "foo.com", NONE, NONE, NONE, NONE),
188 ("http://foo.com/",
189 SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, [], ""), NONE, NONE),
190 ("ftp://bar.com:80/yes/now",
191 SOME Scheme.Ftp, NONE, "bar.com", SOME 80, SOME (true, ["yes"], "now"), NONE,
192 NONE),
193 ("http://z.com/foo?site=http://w.com/~zzz",
194 SOME Scheme.Http, NONE, "z.com", NONE, SOME (true, [], "foo"),
195 SOME "site=http://w.com/~zzz", NONE),
196 ("http://sweeks@foo.com/yes?really#here",
197 SOME Scheme.Http, SOME "sweeks", "foo.com", NONE, SOME (true, [], "yes"),
198 SOME "really", SOME "here"),
199 ("http://a.com/foo?%79%65%73%2e",
200 SOME Scheme.Http, NONE, "a.com", NONE, SOME (true, [], "foo"), SOME "yes.", NONE),
201 ("http://foo.com/a?b%20c",
202 SOME Scheme.Http, NONE, "foo.com", NONE,
203 SOME (true, [], "a"),
204 SOME "b c",
205 NONE),
206 ("http://community.cnn.com/cgi-bin/WebX?13@236.FzKWcVUHjLB^0@.ee7bada",
207 SOME Scheme.Http, NONE, "community.cnn.com", NONE,
208 SOME (true, ["cgi-bin"], "WebX"),
209 SOME "13@236.FzKWcVUHjLB^0@.ee7bada",
210 NONE),
211 ("//foo.com/z", NONE, NONE, "foo.com", NONE, SOME (true, [], "z"), NONE, NONE),
212 ("http://ad.doubleclick.net/adj/N674.briefing.com/B22024;abr=!ie;sz=125x125;ord= [timestamp]?",
213 SOME Scheme.Http, NONE, "ad.doubleclick.net", NONE,
214 SOME (true, ["adj", "N674.briefing.com"],
215 "B22024;abr=!ie;sz=125x125;ord= [timestamp]"),
216 SOME "",
217 NONE),
218 ("http://tac.eecs.umich.edu/cgi-bin/botuser/ViewAccount?VIEW=INFO&VIEWALL= [bad label VIEWALL]",
219 SOME Scheme.Http, NONE, "tac.eecs.umich.edu", NONE,
220 SOME (true, ["cgi-bin", "botuser"], "ViewAccount"),
221 SOME "VIEW=INFO&VIEWALL= [bad label VIEWALL]",
222 NONE),
223 ("http://phase2media.doubleclick.net/adj/ag.aol.p2m.com/asconfirmcollections;kw=shortandpunchy;kw1=;kw2=;abr=!ie;pos=1;sz=125x125;tile=10;ord=964739477869?\"",
224 SOME Scheme.Http, NONE, "phase2media.doubleclick.net", NONE,
225 SOME (true, ["adj", "ag.aol.p2m.com"], "asconfirmcollections;kw=shortandpunchy;kw1=;kw2=;abr=!ie;pos=1;sz=125x125;tile=10;ord=964739477869"),
226 SOME "\"",
227 NONE)
228 ],
229 fn (s, scheme, user, host, port, path, query, fragment) =>
230 valOf (fromString s)
231 = T {scheme = scheme,
232 authority = SOME {user = user,
233 host = host,
234 port = port},
235 path = Option.map (path, fn (i, p, f) => {isAbsolute = i,
236 path = p, file = f}),
237 query = query,
238 fragment = fragment}))
239
240end