1 (* Copyright (C
) 2009 Matthew Fluet
.
2 * Copyright (C
) 1999-2006 Henry Cejtin
, Matthew Fluet
, Suresh
3 * Jagannathan
, and Stephen Weeks
.
5 * MLton is released under a BSD
-style license
.
6 * See the file MLton
-LICENSE for details
.
12 val escapeQuery
= ref
true
20 fun fromHexChars (hi
: t
, lo
: t
) =
21 chr (toHexDigit hi
* radix
+ toHexDigit lo
)
23 fun toHexChars (c
: t
): t
* t
=
25 val (hi
, lo
) = Int.divMod (ord c
, radix
)
27 (fromHexDigit hi
, fromHexDigit lo
)
30 fun escapeHex (c
: t
): string =
32 val (hi
, lo
) = toHexChars c
34 implode
[#
"%", hi
, lo
]
39 fun unescape (s
: string): string =
41 fun sub i
= String.sub (s
, i
)
46 else let val c
= sub i
49 Char.fromHexChars (sub (i
+ 1), sub (i
+ 2)) :: cs
)
50 else loop (i
+ 1, c
:: cs
)
55 val shouldEscape
: char
-> bool =
58 orelse c
= Char.dquote
59 orelse String.contains ("?<>#% {}|\\^ []`", c
))
62 Trace
.trace ("Url.shouldEscape", Char.layout
, Bool.layout
) shouldEscape
90 String.memoizeList (fn _
=> Error
.bug
"Url.Scheme.fromString", map
)
92 val equals
: t
* t
-> bool = op =
95 #
1 (valOf (List.peek (map
, fn (_
, s
') => equals (s
, s
'))))
97 val layout
= Layout
.str
o toString
100 structure Authority
=
102 type t
= {user
: string option
,
106 fun layout ({user
, host
, port
}: t
) =
107 Layout
.record
[("user", Option
.layout
String.layout user
),
108 ("host", String.layout host
),
109 ("port", Option
.layout
Int.layout port
)]
111 fun canonicalize
{user
, host
, port
} =
112 {user
= Option
.map (user
, String.toLower
),
113 host
= String.toLower host
,
116 fun equals ({user
= u
, host
= h
, port
= p
}: t
,
117 {user
= u
', host
= h
', port
= p
'}: t
): bool =
118 Option
.equals (u
, u
', String.equals
)
119 andalso String.toLower h
= String.toLower h
'
120 andalso Option
.equals (p
, p
', Port
.equals
)
123 Trace
.trace2 ("Url.Authority.equals", layout
, layout
, Bool.layout
) equals
126 (* The numbers
in comments are rule numbers from Section
5.2 of RFC
2396. *)
127 (* canonicalizePath (p1
, p2
, f
)
128 * Assume p1 is already canonicalized
.
130 fun canonicalizePath (p1
: string list
, p2
: string list
, f
: string) =
136 "." => (rev ac
, "") (* 6d
*)
137 |
".." => (case ac
of
139 |
".." :: _
=> (rev ac
, "..")
140 | _
:: ac
=> (rev ac
, "")) (* 6f
*)
142 |
"" :: r
=> loop (r
, ac
)
143 |
"." :: r
=> loop (r
, ac
) (* 6c
*)
144 |
".." :: r
=> loop (r
,
147 |
".." :: _
=> ".." :: ac
148 | _
:: ac
=> ac
) (* 6e
*)
149 | s
:: r
=> loop (r
, s
:: ac
)
155 type t
= {file
: string,
160 fun make
f (p
: t
) = f p
162 val file
= make #file
163 val isAbsolute
= make #isAbsolute
164 val path
= make #path
167 val root
= {isAbsolute
= true,
171 fun canonicalize
{isAbsolute
= i
, path
= p
, file
= f
} =
172 let val (p
, f
) = canonicalizePath ([], p
, f
)
173 in {isAbsolute
= i
, path
= p
, file
= f
}
176 fun toString ({isAbsolute
, path
, file
}) =
177 concat
[if isAbsolute
then "/" else "",
178 escape (concat (List.separate (path @
[file
], "/")))]
180 val layout
= Layout
.str
o toString
184 T
of {authority
: Authority
.t option
,
185 fragment
: string option
,
187 query
: string option
,
188 scheme
: Scheme
.t option
} (* NONE
in relative urls
*)
189 | JavaScript
of string
192 | Opaque
of {scheme
: string,
195 fun addQuery (u
: t
, q
) =
197 T
{authority
, fragment
, path
, query
, scheme
}=>
199 then Error
.bug
"Url.addQuery"
201 T
{authority
= authority
,
206 | _
=> Error
.bug
"Url.addQuery"
208 fun host (u
: t
): string =
210 T
{authority
= SOME
{host
, ...}, ...} => host
211 | _
=> Error
.bug
"Url.host"
213 fun path (u
: t
): Path
.t
=
215 T
{path
= SOME p
, ...} => p
216 | _
=> Error
.bug
"Url.path"
225 T
{scheme
, authority
, path
, query
, fragment
} =>
226 concat
[mo (scheme
, fn s
=> concat
[Scheme
.toString s
, ":"]),
227 mo (authority
, fn {user
, host
, port
} =>
229 mo (user
, fn u
=> concat
[escape u
, "@"]),
231 mo (port
, fn p
=> concat
[":", Int.toString p
])]),
232 mo (path
, Path
.toString
),
233 mo (query
, fn q
=> concat
["?", if !escapeQuery
then escape q
235 mo (fragment
, fn f
=> concat
["#", escape f
])
237 | JavaScript s
=> concat
["javascript:", escape s
]
238 | MailTo email
=> concat
["mailto:", escape email
]
239 | News group
=> concat
["news:", escape group
]
240 | Opaque
{scheme
, rest
} => concat
[scheme
, ":", escape rest
]
242 val layout
= Layout
.str
o toString
245 Trace
.trace ("Url.toString", layout
, String.layout
) toString
248 fn T
{scheme
, authority
, path
, query
, fragment
} =>
249 Layout
.record
[("scheme", Option
.layout Scheme
.layout scheme
),
250 ("authority", Option
.layout Authority
.layout authority
),
251 ("path", Option
.layout Path
.layout path
),
252 ("query", Option
.layout
String.layout query
),
253 ("fragment", Option
.layout
String.layout fragment
)]
256 val equals
: t
* t
-> bool = op =
262 val digit
= isChar
Char.isDigit
263 val upalpha
= isChar
Char.isUpper
264 val lowalpha
= isChar
Char.isLower
265 val alpha
= isChar
Char.isAlpha
266 val alphanum
= isChar
Char.isAlphaNum
267 val hex
= isChar
Char.isHexDigit
268 val escaped
= seq
[char #
"%", hex
, hex
]
269 val mark
= oneOf
"-_.!~*' ()"
270 val unreserved
= or
[alphanum
, mark
]
271 val reserved
= oneOf
";/?:@&=+$,"
272 val printable
= isChar
Char.isPrint
273 (*val urlc
= or
[reserved
, unreserved
, escaped
]*)
274 (* It
's pointless to follow the spec on urlc
, which rules out lots
of
275 * printable characters
. Lot
's
of sites use printable characters outside
276 * the spec
, and browsers
handle them
, so we should too
.
279 val fragment
' = Save
.new ()
280 val fragment
= save (star urlc
, fragment
')
281 val query
' = Save
.new ()
282 (* The official definition
of query says urlc
*, but this doesn
't work
with
283 * our expanded meaning
of urlc
= printable
, since
then the query consumes
286 (* val query
= save (star urlc
, query
') *)
287 val query
= save (star (isChar (fn c
=> Char.isPrint c
290 val port
' = Save
.new ()
291 val port
= save (star digit
, port
')
292 val IPv4address
= seq
[oneOrMore digit
, char #
".",
293 oneOrMore digit
, char #
".",
294 oneOrMore digit
, char #
".",
296 val toplabel
= or
[alpha
,
297 seq
[alpha
, star (or
[alphanum
, char #
"-"]), alphanum
]]
298 val domainlabel
= or
[alphanum
,
300 star (or
[alphanum
, char #
"-"]),
302 val hostname
= seq
[star (seq
[domainlabel
, char #
"."]),
304 optional (char #
".")]
305 val host
' = Save
.new ()
306 val host
= save (or
[hostname
, IPv4address
], host
')
307 val hostport
= seq
[host
, optional (seq
[char #
":", port
])]
308 val userinfo
' = Save
.new ()
310 save (star (or
[unreserved
, escaped
, oneOf
";:&=+$"]), userinfo
')
311 val server
= optional (seq
[optional (seq
[userinfo
, char #
"@"]),
313 val regName
' = Save
.new ()
315 save (oneOrMore (or
[unreserved
,
319 val authority
= or
[server
, regName
]
320 val scheme
' = Save
.new ()
322 save (seq
[alpha
, star (or
[alpha
, digit
, oneOf
"+-."])], scheme
')
323 val relSegment
' = Save
.new ()
325 save (oneOrMore (or
[unreserved
, escaped
, oneOf
";@&=+$,"]),
327 (* val pchar
= or
[unreserved
, escaped
, oneOf
":@&=+$,", wrong
] *)
328 (* val param
= star pchar
*)
329 (* val segment
= seq
[star pchar
, star (seq
[char #
";", param
])] *)
330 (* val pathSegments
= seq
[segment
, star (seq
[char #
"/", segment
])] *)
331 val pathSegments
' = Save
.new ()
333 save (star (isChar (fn c
=> (Char.isPrint c
andalso
334 not (String.contains ("?#", c
))))),
336 val absPath
= seq
[char #
"/", pathSegments
]
337 val relPath
= seq
[relSegment
, optional absPath
]
338 val netPath
= seq
[string "//", authority
, optional absPath
]
339 val urlcNoSlash
= or
[unreserved
, escaped
, oneOf
";?:@&=+$,"]
340 val opaquePart
' = Save
.new ()
341 val opaquePart
= save (seq
[urlcNoSlash
, star urlc
], opaquePart
')
342 val hierPart
= seq
[or
[netPath
, absPath
],
343 optional (seq
[char #
"?", query
])]
344 (* netPath occurs
before absPath
in the following regexp because
345 * you want urls like
//foo
.com
/z to be a netPath
with host foo
.com
and
346 * not
as an absPath
. Fortunately
, the regexp library returns the
347 * first matching choice
in an or
.
350 seq
[or
[netPath
, absPath
, relPath
,
351 null (* null added for empty urls
-- these are
352 * not
in RFC
2396 as far
as I can tell
, but
353 * some
of their examples use them
.
356 optional (seq
[char #
"?", query
])]
357 val absoluteUrl
= seq
[scheme
, char #
":", or
[hierPart
, opaquePart
]]
358 val url
= seq
[optional (or
[absoluteUrl
, relativeUrl
]),
359 optional (seq
[char #
"#", fragment
])]
360 val url
= Promise
.lazy (fn () => compileDFA url
)
362 fun peekQuery (m
: Match
.t
): string option
=
363 Option
.map (Match
.peek (m
, query
'), fn ss
=>
365 val s
= Substring
.toString ss
372 fun getAbsPath (m
: Match
.t
): Path
.t
=
373 case Match
.peek (m
, pathSegments
') of
374 NONE
=> Error
.bug
"Url.Regexp.getAbsPath"
377 val s
= Substring
.toString ss
380 (String.fields (unescape s
, fn c
=> c
= #
"/"))
381 in {isAbsolute
= true, path
= p
, file
= f
}
385 fun getMatch (m
: Regexp
.Match
.t
): t
=
387 val {peek
, lookup
, exists
, ...} = Match
.stringFuns m
388 in if exists opaquePart
'
391 val scheme
= String.toLower (lookup scheme
')
392 val rest
= unescape (lookup opaquePart
')
394 "javascript" => JavaScript rest
395 |
"mailto" => MailTo rest
396 |
"news" => News rest
397 | _
=> Opaque
{scheme
= scheme
, rest
= rest
}
404 SOME
{user
= Option
.map (peek userinfo
', unescape
),
406 port
= Option
.map (peek port
',
407 valOf
o Int.fromString
)}
409 fun split ss
= String.fields (unescape ss
, fn c
=> c
= #
"/")
411 case (Option
.map (peek relSegment
', unescape
),
412 Option
.map (peek pathSegments
', split
)) of
414 |
(SOME file
, NONE
) => SOME
{isAbsolute
= false,
418 let val (p
, f
) = List.splitLast ss
419 in SOME
{isAbsolute
= true,
422 |
(SOME s
, SOME ss
) =>
423 let val (p
, f
) = List.splitLast ss
424 in SOME
{isAbsolute
= false,
425 path
= s
:: p
, file
= f
}
427 in T
{scheme
= Option
.map (peek scheme
', Scheme
.fromString
),
428 authority
= authority
,
431 fragment
= Option
.map (peek fragment
', unescape
)}
435 fun fromString (urlString
: string): t option
=
436 Option
.map (Regexp
.Compiled
.matchAll (Regexp
.url(), urlString
), getMatch
)
439 Trace
.trace ("Url.fromString", String.layout
, Option
.layout layout
)
442 fun equals (u
: t
, u
': t
): bool = u
= u
'
447 (* ------------------------------------------------- *)
449 (* ------------------------------------------------- *)
451 fun relativize
{base
= b
, relative
= r
} =
453 (T
{scheme
= SOME s
, authority
= SOME a
, path
= p
, ...},
454 T
{scheme
= SOME s
', authority
= SOME a
', path
= p
', query
= q
',
456 if Scheme
.equals (s
, s
')
457 andalso Authority
.equals (a
, a
')
463 ([], "") => ([], ".")
465 in SOME
{isAbsolute
= false, path
= p
, file
= f
}
467 val p
': Path
.t option
=
470 |
(NONE
, SOME
{path
, file
, ...}) => some (path
, file
)
471 |
(SOME
{path
, ...}, NONE
) =>
472 some (List.map (path
, fn _
=> ".."), "")
473 |
(SOME
{path
= p
, ...}, SOME
{path
= p
', file
, ...}) =>
476 List.removeCommonPrefix (p
, p
', String.equals
)
478 in some (List.map (p
, fn _
=> "..") @ p
', file
)
480 in SOME (T
{scheme
= NONE
, authority
= NONE
, path
= p
', query
= q
',
487 Trace
.trace ("Url.relativize",
488 fn {base
= b
, relative
= r
} => Layout
.tuple
[layout b
, layout r
],
489 Option
.layout layout
)
492 (* ------------------------------------------------- *)
494 (* ------------------------------------------------- *)
496 (* The numbers
in comments are rule numbers from Section
5.2 of RFC
2396. *)
497 fun resolve
{base
, relative
} =
498 case (base
, relative
) of
499 (_
, T
{scheme
= SOME _
, ...}) => relative (* 3 *)
500 |
(T
{scheme
= s
, authority
= a
, path
= p
, query
= q
, ...},
501 T
{authority
= a
', path
= p
', query
= q
', fragment
= f
', ...}) =>
505 (SOME _
, _
, _
) => (a
', p
', q
') (* 4 *)
506 |
(_
, NONE
, NONE
) => (a
, p
, q
) (* 2 *)
507 |
(_
, NONE
, SOME _
) => (* 6 *)
510 Option
.map (p
, fn {isAbsolute
, path
, file
} =>
511 {isAbsolute
= isAbsolute
,
516 |
(_
, SOME
{isAbsolute
= true, ...}, _
) => (a
, p
', q
') (* 5 *)
517 |
(_
, SOME
{isAbsolute
= false, path
= p
', file
= f
'}, _
) => (* 6 *)
522 | SOME
{path
, ...} => canonicalizePath (path
, p
', f
')
523 in (a
, SOME
{isAbsolute
= true, path
= p
', file
= f
'}, q
')
525 in T
{scheme
= s
, authority
= a
, path
= p
, query
= q
, fragment
= f
'}
532 fn {base
= b
, relative
= r
} => Layout
.tuple
[layout b
, layout r
],
536 (* ------------------------------------------------- *)
538 (* ------------------------------------------------- *)
540 fun canonicalize (u
: t
): t
=
542 T
{scheme
, authority
, path
, query
, fragment
} =>
544 authority
= Option
.map (authority
, Authority
.canonicalize
),
545 path
= Option
.map (path
, Path
.canonicalize
),