| 1 | (* Copyright (C) 2009 Matthew Fluet. |
| 2 | * Copyright (C) 1999-2006 Henry Cejtin, Matthew Fluet, Suresh |
| 3 | * Jagannathan, and Stephen Weeks. |
| 4 | * |
| 5 | * MLton is released under a BSD-style license. |
| 6 | * See the file MLton-LICENSE for details. |
| 7 | *) |
| 8 | |
| 9 | (* |
| 10 | * URLs, as described in RFC 2396. |
| 11 | * |
| 12 | * For ease of programming, I merged all of the nested components into one type |
| 13 | * with lots of option components. |
| 14 | *) |
| 15 | signature URL = |
| 16 | sig |
| 17 | structure Char: |
| 18 | sig |
| 19 | type t = Char.t |
| 20 | |
| 21 | val escapeHex: t -> string |
| 22 | end |
| 23 | |
| 24 | structure Scheme: |
| 25 | sig |
| 26 | datatype t = |
| 27 | File |
| 28 | | Ftp |
| 29 | | Gopher |
| 30 | | Http |
| 31 | | Https |
| 32 | | Telnet |
| 33 | end |
| 34 | |
| 35 | structure Authority: |
| 36 | sig |
| 37 | type t = {host: string, |
| 38 | port: int option, |
| 39 | user: string option} |
| 40 | |
| 41 | val equals: t * t -> bool |
| 42 | end |
| 43 | |
| 44 | structure Path: |
| 45 | sig |
| 46 | type t = {isAbsolute: bool, |
| 47 | path: string list, |
| 48 | file: string} |
| 49 | |
| 50 | val file: t -> string |
| 51 | val layout: t -> Layout.t |
| 52 | val root: t |
| 53 | val toString: t -> string |
| 54 | end |
| 55 | |
| 56 | datatype t = |
| 57 | T of {authority: Authority.t option, |
| 58 | fragment: string option, |
| 59 | path: Path.t option, |
| 60 | query: string option, |
| 61 | scheme: Scheme.t option} (* NONE in relative urls *) |
| 62 | | JavaScript of string |
| 63 | | MailTo of string |
| 64 | | News of string |
| 65 | | Opaque of {scheme: string, |
| 66 | rest: string} |
| 67 | |
| 68 | val addQuery: t * string -> t |
| 69 | val canonicalize: t -> t |
| 70 | val equals: t * t -> bool |
| 71 | val escape: string -> string (* Insert %XX escapes into string. *) |
| 72 | val escapeQuery: bool ref |
| 73 | val fromString: string -> t option |
| 74 | val getMatch: Regexp.Match.t -> t |
| 75 | val host: t -> string |
| 76 | val layout: t -> Layout.t |
| 77 | val mailto: string -> t |
| 78 | val path: t -> Path.t |
| 79 | (* relativize {base = b, relative = r} |
| 80 | * trys turn r into a url relative to b |
| 81 | *) |
| 82 | val relativize: {base: t, relative: t} -> t option |
| 83 | (* resolve {base = b, relative = r} |
| 84 | * interprets r relative to b, returning an absolute URL. |
| 85 | *) |
| 86 | val resolve: {base: t, relative: t} -> t |
| 87 | val toString: t -> string |
| 88 | val unescape: string -> string (* Remove %XX escapes from string. *) |
| 89 | |
| 90 | structure Regexp: |
| 91 | sig |
| 92 | type t = Regexp.t |
| 93 | |
| 94 | val absoluteUrl: t |
| 95 | val absPath: t |
| 96 | val authority: t |
| 97 | val query: t |
| 98 | |
| 99 | val getAbsPath: Regexp.Match.t -> Path.t |
| 100 | val peekQuery: Regexp.Match.t -> string option |
| 101 | end |
| 102 | end |
| 103 | |
| 104 | functor TestUrl (S: URL): sig end = |
| 105 | struct |
| 106 | |
| 107 | open S |
| 108 | |
| 109 | val _ = |
| 110 | Assert.assert |
| 111 | ("TestUrl.resolve", fn () => |
| 112 | (* Examples from RFC 2396, Appendix C. *) |
| 113 | let |
| 114 | val base = valOf (fromString "http://a/b/c/d;p?q") |
| 115 | val examples = |
| 116 | [("g", "http://a/b/c/g"), |
| 117 | ("./g", "http://a/b/c/g"), |
| 118 | ("g/", "http://a/b/c/g/"), |
| 119 | ("/g", "http://a/g"), |
| 120 | ("?y", "http://a/b/c/?y"), |
| 121 | ("g?y", "http://a/b/c/g?y"), |
| 122 | ("#s", "http://a/b/c/d;p?q#s"), |
| 123 | ("g#s", "http://a/b/c/g#s"), |
| 124 | ("g?y#s", "http://a/b/c/g?y#s"), |
| 125 | (";x", "http://a/b/c/;x"), |
| 126 | ("g;x", "http://a/b/c/g;x"), |
| 127 | ("g;x?y#s", "http://a/b/c/g;x?y#s"), |
| 128 | (".", "http://a/b/c/"), |
| 129 | ("./", "http://a/b/c/"), |
| 130 | ("..", "http://a/b/"), |
| 131 | ("../", "http://a/b/"), |
| 132 | ("../g", "http://a/b/g"), |
| 133 | ("../..", "http://a/"), |
| 134 | ("../../", "http://a/"), |
| 135 | ("../../g", "http://a/g")] |
| 136 | fun checkResolve (rel, abs) = |
| 137 | abs = toString (resolve {base = base, |
| 138 | relative = valOf (fromString rel)}) |
| 139 | val checkResolve = |
| 140 | Trace.trace2 |
| 141 | ("TestUrl.checkResolve", String.layout, String.layout, Bool.layout) |
| 142 | checkResolve |
| 143 | in List.forall ([("g:h", "g:h"), ("//g", "http://g")], |
| 144 | checkResolve) |
| 145 | andalso |
| 146 | List.forall |
| 147 | (examples, fn (rel, abs) => |
| 148 | checkResolve (rel, abs) andalso |
| 149 | checkResolve (toString |
| 150 | (valOf |
| 151 | (relativize {base = base, |
| 152 | relative = valOf (fromString abs)})), |
| 153 | abs)) |
| 154 | end) |
| 155 | |
| 156 | val _ = |
| 157 | Assert.assert |
| 158 | ("TestUrl", fn () => |
| 159 | fromString "mailto:sweeks@sweeks.com" = SOME (MailTo "sweeks@sweeks.com") |
| 160 | andalso isSome |
| 161 | (fromString "http://sports.latimes.com/RealMedia/ads/adstream_lx.ads/sports.latimes.com/stats/oth/oth/oth/columnists.html/21801/Top/NextCardGW002/u40_card_dreamer_V3.gif/63306138643531333339663061393230") |
| 162 | andalso isSome (fromString |
| 163 | "http://dps1.travelocity.com:80/airpprice.ctl?previous_page=airpdisp&mixed_gt=N&tkt_status=N&option_num=1&seg_for_sell=1%26SJC%26San%20Jose,%20CA%2620001123%260750%26AA%26American%20Airlines%262456%26L%260%26McDonnell%20Douglas%20SP80%20Jet%26DFW%26Dallas%2fFt%20Worth,%20TX%261313%2620001123%26Thursday%26%26%26S80%26Y|1%26DFW%26Dallas%2fFt%20Worth,%20TX%2620001123%261433%26AA%26American%20Airlines%263741%26L%260%26Embraer%20ERJ-145%20Jet%26OKC%26Oklahoma%20City,%20OK%261529%2620001123%26Thursday%26%26%26ER4%26Y%3a1%26DFW%26Dallas%2fFt%20Worth,%20TX%2620001126%260918%26AA%26American%20Airlines%262451%26V%260%26McDonnell%20Douglas%20SP80%20Jet%26SJC%26San%20Jose,%20CA%261057%2620001126%26Sunday%26%26%26S80%26Y&hold_flag=N&SEQ=97122479938121310102000&LANG=EN&last_pgd_page=airpdisp.pgd") |
| 164 | andalso isSome (fromString |
| 165 | "large-int.html#SIG:INT_INF.\\|@LT\\|\\|@LT\\|:VAL:SPEC") |
| 166 | andalso |
| 167 | List.forall |
| 168 | ([("http://Norma140.emp3.com/cgibin/optin/remove.pl", |
| 169 | SOME Scheme.Http, NONE, "Norma140.emp3.com", NONE, |
| 170 | SOME (true, ["cgibin", "optin"], "remove.pl"), |
| 171 | NONE, NONE), |
| 172 | ("http://s7.sprintpcs.com/store/..\\store\\cc_Popup_aa.asp", |
| 173 | SOME Scheme.Http, NONE, "s7.sprintpcs.com", NONE, |
| 174 | SOME (true, ["store"], "..\\store\\cc_Popup_aa.asp"), |
| 175 | NONE, NONE), |
| 176 | ("http://www.sds.lcs.mit.edu/spd/larch/", |
| 177 | SOME Scheme.Http, NONE, "www.sds.lcs.mit.edu", NONE, |
| 178 | SOME (true, ["spd", "larch"], ""), |
| 179 | NONE, NONE), |
| 180 | ("http://foo.com/hello", |
| 181 | SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, [], "hello"), |
| 182 | NONE, NONE), |
| 183 | ("http://foo.com/hello/", |
| 184 | SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, ["hello"], ""), |
| 185 | NONE, NONE), |
| 186 | ("http://foo.com", |
| 187 | SOME Scheme.Http, NONE, "foo.com", NONE, NONE, NONE, NONE), |
| 188 | ("http://foo.com/", |
| 189 | SOME Scheme.Http, NONE, "foo.com", NONE, SOME (true, [], ""), NONE, NONE), |
| 190 | ("ftp://bar.com:80/yes/now", |
| 191 | SOME Scheme.Ftp, NONE, "bar.com", SOME 80, SOME (true, ["yes"], "now"), NONE, |
| 192 | NONE), |
| 193 | ("http://z.com/foo?site=http://w.com/~zzz", |
| 194 | SOME Scheme.Http, NONE, "z.com", NONE, SOME (true, [], "foo"), |
| 195 | SOME "site=http://w.com/~zzz", NONE), |
| 196 | ("http://sweeks@foo.com/yes?really#here", |
| 197 | SOME Scheme.Http, SOME "sweeks", "foo.com", NONE, SOME (true, [], "yes"), |
| 198 | SOME "really", SOME "here"), |
| 199 | ("http://a.com/foo?%79%65%73%2e", |
| 200 | SOME Scheme.Http, NONE, "a.com", NONE, SOME (true, [], "foo"), SOME "yes.", NONE), |
| 201 | ("http://foo.com/a?b%20c", |
| 202 | SOME Scheme.Http, NONE, "foo.com", NONE, |
| 203 | SOME (true, [], "a"), |
| 204 | SOME "b c", |
| 205 | NONE), |
| 206 | ("http://community.cnn.com/cgi-bin/WebX?13@236.FzKWcVUHjLB^0@.ee7bada", |
| 207 | SOME Scheme.Http, NONE, "community.cnn.com", NONE, |
| 208 | SOME (true, ["cgi-bin"], "WebX"), |
| 209 | SOME "13@236.FzKWcVUHjLB^0@.ee7bada", |
| 210 | NONE), |
| 211 | ("//foo.com/z", NONE, NONE, "foo.com", NONE, SOME (true, [], "z"), NONE, NONE), |
| 212 | ("http://ad.doubleclick.net/adj/N674.briefing.com/B22024;abr=!ie;sz=125x125;ord= [timestamp]?", |
| 213 | SOME Scheme.Http, NONE, "ad.doubleclick.net", NONE, |
| 214 | SOME (true, ["adj", "N674.briefing.com"], |
| 215 | "B22024;abr=!ie;sz=125x125;ord= [timestamp]"), |
| 216 | SOME "", |
| 217 | NONE), |
| 218 | ("http://tac.eecs.umich.edu/cgi-bin/botuser/ViewAccount?VIEW=INFO&VIEWALL= [bad label VIEWALL]", |
| 219 | SOME Scheme.Http, NONE, "tac.eecs.umich.edu", NONE, |
| 220 | SOME (true, ["cgi-bin", "botuser"], "ViewAccount"), |
| 221 | SOME "VIEW=INFO&VIEWALL= [bad label VIEWALL]", |
| 222 | NONE), |
| 223 | ("http://phase2media.doubleclick.net/adj/ag.aol.p2m.com/asconfirmcollections;kw=shortandpunchy;kw1=;kw2=;abr=!ie;pos=1;sz=125x125;tile=10;ord=964739477869?\"", |
| 224 | SOME Scheme.Http, NONE, "phase2media.doubleclick.net", NONE, |
| 225 | SOME (true, ["adj", "ag.aol.p2m.com"], "asconfirmcollections;kw=shortandpunchy;kw1=;kw2=;abr=!ie;pos=1;sz=125x125;tile=10;ord=964739477869"), |
| 226 | SOME "\"", |
| 227 | NONE) |
| 228 | ], |
| 229 | fn (s, scheme, user, host, port, path, query, fragment) => |
| 230 | valOf (fromString s) |
| 231 | = T {scheme = scheme, |
| 232 | authority = SOME {user = user, |
| 233 | host = host, |
| 234 | port = port}, |
| 235 | path = Option.map (path, fn (i, p, f) => {isAbsolute = i, |
| 236 | path = p, file = f}), |
| 237 | query = query, |
| 238 | fragment = fragment})) |
| 239 | |
| 240 | end |