Commit | Line | Data |
---|---|---|
7f918cf1 CE |
1 | (* Copyright (C) 2009 Matthew Fluet. |
2 | * Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh | |
3 | * Jagannathan, and Stephen Weeks. | |
4 | * | |
5 | * MLton is released under a BSD-style license. | |
6 | * See the file MLton-LICENSE for details. | |
7 | *) | |
8 | ||
9 | (* | |
10 | * URLs, as described in RFC 2396. | |
11 | * | |
12 | * For ease of programming, I merged all of the nested components into one type | |
13 | * with lots of option components. | |
14 | *) | |
15 | signature URL = | |
16 | sig | |
17 | structure Char: | |
18 | sig | |
19 | type t = Char.t | |
20 | ||
21 | val escapeHex: t -> string | |
22 | end | |
23 | ||
24 | structure Scheme: | |
25 | sig | |
26 | datatype t = | |
27 | File | |
28 | | Ftp | |
29 | | Gopher | |
30 | | Http | |
31 | | Https | |
32 | | Telnet | |
33 | end | |
34 | ||
35 | structure Authority: | |
36 | sig | |
37 | type t = {host: string, | |
38 | port: int option, | |
39 | user: string option} | |
40 | ||
41 | val equals: t * t -> bool | |
42 | end | |
43 | ||
44 | structure Path: | |
45 | sig | |
46 | type t = {isAbsolute: bool, | |
47 | path: string list, | |
48 | file: string} | |
49 | ||
50 | val file: t -> string | |
51 | val layout: t -> Layout.t | |
52 | val root: t | |
53 | val toString: t -> string | |
54 | end | |
55 | ||
56 | datatype t = | |
57 | T of {authority: Authority.t option, | |
58 | fragment: string option, | |
59 | path: Path.t option, | |
60 | query: string option, | |
61 | scheme: Scheme.t option} (* NONE in relative urls *) | |
62 | | JavaScript of string | |
63 | | MailTo of string | |
64 | | News of string | |
65 | | Opaque of {scheme: string, | |
66 | rest: string} | |
67 | ||
68 | val addQuery: t * string -> t | |
69 | val canonicalize: t -> t | |
70 | val equals: t * t -> bool | |
71 | val escape: string -> string (* Insert %XX escapes into string. *) | |
72 | val escapeQuery: bool ref | |
73 | val fromString: string -> t option | |
74 | val getMatch: Regexp.Match.t -> t | |
75 | val host: t -> string | |
76 | val layout: t -> Layout.t | |
77 | val mailto: string -> t | |
78 | val path: t -> Path.t | |
79 | (* relativize {base = b, relative = r} | |
80 | * trys turn r into a url relative to b | |
81 | *) | |
82 | val relativize: {base: t, relative: t} -> t option | |
83 | (* resolve {base = b, relative = r} | |
84 | * interprets r relative to b, returning an absolute URL. | |
85 | *) | |
86 | val resolve: {base: t, relative: t} -> t | |
87 | val toString: t -> string | |
88 | val unescape: string -> string (* Remove %XX escapes from string. *) | |
89 | ||
90 | structure Regexp: | |
91 | sig | |
92 | type t = Regexp.t | |
93 | ||
94 | val absoluteUrl: t | |
95 | val absPath: t | |
96 | val authority: t | |
97 | val query: t | |
98 | ||
99 | val getAbsPath: Regexp.Match.t -> Path.t | |
100 | val peekQuery: Regexp.Match.t -> string option | |
101 | end | |
102 | end | |
103 | ||
104 | functor TestUrl (S: URL): sig end = | |
105 | struct | |
106 | ||
107 | open S | |
108 | ||
109 | val _ = | |
110 | Assert.assert | |
111 | ("TestUrl.resolve", fn () => | |
112 | (* Examples from RFC 2396, Appendix C. *) | |
113 | let | |
114 | val base = valOf (fromString "http://a/b/c/d;p?q") | |
115 | val examples = | |
116 | [("g", "http://a/b/c/g"), | |
117 | ("./g", "http://a/b/c/g"), | |
118 | ("g/", "http://a/b/c/g/"), | |
119 | ("/g", "http://a/g"), | |
120 | ("?y", "http://a/b/c/?y"), | |
121 | ("g?y", "http://a/b/c/g?y"), | |
122 | ("#s", "http://a/b/c/d;p?q#s"), | |
123 | ("g#s", "http://a/b/c/g#s"), | |
124 | ("g?y#s", "http://a/b/c/g?y#s"), | |
125 | (";x", "http://a/b/c/;x"), | |
126 | ("g;x", "http://a/b/c/g;x"), | |
127 | ("g;x?y#s", "http://a/b/c/g;x?y#s"), | |
128 | (".", "http://a/b/c/"), | |
129 | ("./", "http://a/b/c/"), | |
130 | ("..", "http://a/b/"), | |
131 | ("../", "http://a/b/"), | |
132 | ("../g", "http://a/b/g"), | |
133 | ("../..", "http://a/"), | |
134 | ("../../", "http://a/"), | |
135 | ("../../g", "http://a/g")] | |
136 | fun checkResolve (rel, abs) = | |
137 | abs = toString (resolve {base = base, | |
138 | relative = valOf (fromString rel)}) | |
139 | val checkResolve = | |
140 | Trace.trace2 | |
141 | ("TestUrl.checkResolve", String.layout, String.layout, Bool.layout) | |
142 | checkResolve | |
143 | in List.forall ([("g:h", "g:h"), ("//g", "http://g")], | |
144 | checkResolve) | |
145 | andalso | |
146 | List.forall | |
147 | (examples, fn (rel, abs) => | |
148 | checkResolve (rel, abs) andalso | |
149 | checkResolve (toString | |
150 | (valOf | |
151 | (relativize {base = base, | |
152 | relative = valOf (fromString abs)})), | |
153 | abs)) | |
154 | end) | |
155 | ||
156 | val _ = | |
157 | Assert.assert | |
158 | ("TestUrl", fn () => | |
159 | fromString "mailto:sweeks@sweeks.com" = SOME (MailTo "sweeks@sweeks.com") | |
160 | andalso isSome | |
161 | (fromString "http://sports.latimes.com/RealMedia/ads/adstream_lx.ads/sports.latimes.com/stats/oth/oth/oth/columnists.html/21801/Top/NextCardGW002/u40_card_dreamer_V3.gif/63306138643531333339663061393230") | |
162 | andalso isSome (fromString | |
163 | "http://dps1.travelocity.com:80/airpprice.ctl?previous_page=airpdisp&mixed_gt=N&tkt_status=N&option_num=1&seg_for_sell=1%26SJC%26San%20Jose,%20CA%2620001123%260750%26AA%26American%20Airlines%262456%26L%260%26McDonnell%20Douglas%20SP80%20Jet%26DFW%26Dallas%2fFt%20Worth,%20TX%261313%2620001123%26Thursday%26%26%26S80%26Y|1%26DFW%26Dallas%2fFt%20Worth,%20TX%2620001123%261433%26AA%26American%20Airlines%263741%26L%260%26Embraer%20ERJ-145%20Jet%26OKC%26Oklahoma%20City,%20OK%261529%2620001123%26Thursday%26%26%26ER4%26Y%3a1%26DFW%26Dallas%2fFt%20Worth,%20TX%2620001126%260918%26AA%26American%20Airlines%262451%26V%260%26McDonnell%20Douglas%20SP80%20Jet%26SJC%26San%20Jose,%20CA%261057%2620001126%26Sunday%26%26%26S80%26Y&hold_flag=N&SEQ=97122479938121310102000&LANG=EN&last_pgd_page=airpdisp.pgd") | |
164 | andalso isSome (fromString | |
165 | "large-int.html#SIG:INT_INF.\\|@LT\\|\\|@LT\\|:VAL:SPEC") | |
166 | andalso | |
167 | List.forall | |
168 | ([("http://Norma140.emp3.com/cgibin/optin/remove.pl", | |
169 | SOME Scheme.Http, NONE, "Norma140.emp3.com", NONE, | |
170 | SOME (true, ["cgibin", "optin"], "remove.pl"), | |
171 | NONE, NONE), | |
172 | ("http://s7.sprintpcs.com/store/..\\store\\cc_Popup_aa.asp", | |
173 | SOME Scheme.Http, NONE, "s7.sprintpcs.com", NONE, | |
174 | SOME (true, ["store"], "..\\store\\cc_Popup_aa.asp"), | |
175 | NONE, NONE), | |
176 | ("http://www.sds.lcs.mit.edu/spd/larch/", | |
177 | SOME Scheme.Http, NONE, "www.sds.lcs.mit.edu", NONE, | |
178 | SOME (true, ["spd", "larch"], ""), | |
179 | NONE, NONE), | |
180 | ("http://foo.com/hello", | |
181 | SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, [], "hello"), | |
182 | NONE, NONE), | |
183 | ("http://foo.com/hello/", | |
184 | SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, ["hello"], ""), | |
185 | NONE, NONE), | |
186 | ("http://foo.com", | |
187 | SOME Scheme.Http, NONE, "foo.com", NONE, NONE, NONE, NONE), | |
188 | ("http://foo.com/", | |
189 | SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, [], ""), NONE, NONE), | |
190 | ("ftp://bar.com:80/yes/now", | |
191 | SOME Scheme.Ftp, NONE, "bar.com", SOME 80, SOME (true, ["yes"], "now"), NONE, | |
192 | NONE), | |
193 | ("http://z.com/foo?site=http://w.com/~zzz", | |
194 | SOME Scheme.Http, NONE, "z.com", NONE, SOME (true, [], "foo"), | |
195 | SOME "site=http://w.com/~zzz", NONE), | |
196 | ("http://sweeks@foo.com/yes?really#here", | |
197 | SOME Scheme.Http, SOME "sweeks", "foo.com", NONE, SOME (true, [], "yes"), | |
198 | SOME "really", SOME "here"), | |
199 | ("http://a.com/foo?%79%65%73%2e", | |
200 | SOME Scheme.Http, NONE, "a.com", NONE, SOME (true, [], "foo"), SOME "yes.", NONE), | |
201 | ("http://foo.com/a?b%20c", | |
202 | SOME Scheme.Http, NONE, "foo.com", NONE, | |
203 | SOME (true, [], "a"), | |
204 | SOME "b c", | |
205 | NONE), | |
206 | ("http://community.cnn.com/cgi-bin/WebX?13@236.FzKWcVUHjLB^0@.ee7bada", | |
207 | SOME Scheme.Http, NONE, "community.cnn.com", NONE, | |
208 | SOME (true, ["cgi-bin"], "WebX"), | |
209 | SOME "13@236.FzKWcVUHjLB^0@.ee7bada", | |
210 | NONE), | |
211 | ("//foo.com/z", NONE, NONE, "foo.com", NONE, SOME (true, [], "z"), NONE, NONE), | |
212 | ("http://ad.doubleclick.net/adj/N674.briefing.com/B22024;abr=!ie;sz=125x125;ord= [timestamp]?", | |
213 | SOME Scheme.Http, NONE, "ad.doubleclick.net", NONE, | |
214 | SOME (true, ["adj", "N674.briefing.com"], | |
215 | "B22024;abr=!ie;sz=125x125;ord= [timestamp]"), | |
216 | SOME "", | |
217 | NONE), | |
218 | ("http://tac.eecs.umich.edu/cgi-bin/botuser/ViewAccount?VIEW=INFO&VIEWALL= [bad label VIEWALL]", | |
219 | SOME Scheme.Http, NONE, "tac.eecs.umich.edu", NONE, | |
220 | SOME (true, ["cgi-bin", "botuser"], "ViewAccount"), | |
221 | SOME "VIEW=INFO&VIEWALL= [bad label VIEWALL]", | |
222 | NONE), | |
223 | ("http://phase2media.doubleclick.net/adj/ag.aol.p2m.com/asconfirmcollections;kw=shortandpunchy;kw1=;kw2=;abr=!ie;pos=1;sz=125x125;tile=10;ord=964739477869?\"", | |
224 | SOME Scheme.Http, NONE, "phase2media.doubleclick.net", NONE, | |
225 | SOME (true, ["adj", "ag.aol.p2m.com"], "asconfirmcollections;kw=shortandpunchy;kw1=;kw2=;abr=!ie;pos=1;sz=125x125;tile=10;ord=964739477869"), | |
226 | SOME "\"", | |
227 | NONE) | |
228 | ], | |
229 | fn (s, scheme, user, host, port, path, query, fragment) => | |
230 | valOf (fromString s) | |
231 | = T {scheme = scheme, | |
232 | authority = SOME {user = user, | |
233 | host = host, | |
234 | port = port}, | |
235 | path = Option.map (path, fn (i, p, f) => {isAbsolute = i, | |
236 | path = p, file = f}), | |
237 | query = query, | |
238 | fragment = fragment})) | |
239 | ||
240 | end |