;;;; (web uri) --- URI manipulation tools
;;;;
-;;;; Copyright (C) 1997,2001,2002,2010,2011,2012 Free Software Foundation, Inc.
+;;;; Copyright (C) 1997,2001,2002,2010,2011,2012,2013 Free Software Foundation, Inc.
;;;;
;;;; This library is free software; you can redistribute it and/or
;;;; modify it under the terms of the GNU Lesser General Public
;;;; License as published by the Free Software Foundation; either
;;;; version 3 of the License, or (at your option) any later version.
-;;;;
+;;;;
;;;; This library is distributed in the hope that it will be useful,
;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;;;; Lesser General Public License for more details.
-;;;;
+;;;;
;;;; You should have received a copy of the GNU Lesser General Public
;;;; License along with this library; if not, write to the Free Software
;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;;; Commentary:
;; A data type for Universal Resource Identifiers, as defined in RFC
-;; 3986.
+;; 3986.
;;; Code:
(query uri-query)
(fragment uri-fragment))
+(define (absolute-uri? obj)
+ (and (uri? obj) (uri-scheme obj) #t))
+
(define (uri-error message . args)
(throw 'uri-error message args))
(define* (build-uri scheme #:key userinfo host port (path "") query fragment
(validate? #t))
- "Construct a URI object. If @var{validate?} is true, also run some
-consistency checks to make sure that the constructed URI is valid."
+ "Construct a URI object. SCHEME should be a symbol, PORT
+either a positive, exact integer or ‘#f’, and the rest of the
+fields are either strings or ‘#f’. If VALIDATE? is true,
+also run some consistency checks to make sure that the constructed URI
+is valid."
(if validate?
(validate-uri scheme userinfo host port path query fragment))
(make-uri scheme userinfo host port path query fragment))
(define fragment-pat
".*")
(define uri-pat
- (format #f "^(~a):(//~a)?(~a)(\\?(~a))?(#(~a))?$"
+ (format #f "^((~a):)?(//~a)?(~a)(\\?(~a))?(#(~a))?$"
scheme-pat authority-pat path-pat query-pat fragment-pat))
(define uri-regexp
(make-regexp uri-pat))
-(define (string->uri string)
- "Parse @var{string} into a URI object. Returns @code{#f} if the string
+(define (string->uri* string)
+ "Parse STRING into a URI object. Return ‘#f’ if the string
could not be parsed."
(% (let ((m (regexp-exec uri-regexp string)))
(if (not m) (abort))
- (let ((scheme (string->symbol
- (string-downcase (match:substring m 1))))
- (authority (match:substring m 2))
- (path (match:substring m 3))
- (query (match:substring m 5))
+ (let ((scheme (let ((str (match:substring m 2)))
+ (and str (string->symbol (string-downcase str)))))
+ (authority (match:substring m 3))
+ (path (match:substring m 4))
+ (query (match:substring m 6))
(fragment (match:substring m 7)))
(call-with-values
(lambda ()
(lambda (k)
#f)))
+(define (string->uri string)
+ "Parse STRING into a URI object. Return ‘#f’ if the string
+could not be parsed."
+ (let ((uri (string->uri* string)))
+ (and uri (uri-scheme uri) uri)))
+
(define *default-ports* (make-hash-table))
(define (declare-default-port! scheme port)
- "Declare a default port for the given URI scheme.
-
-Default ports are for printing URI objects: a default port is not
-printed."
+ "Declare a default port for the given URI scheme."
(hashq-set! *default-ports* scheme port))
(define (default-port? scheme port)
(declare-default-port! 'https 443)
(define (uri->string uri)
- "Serialize @var{uri} to a string."
- (let* ((scheme-str (string-append
- (symbol->string (uri-scheme uri)) ":"))
+ "Serialize URI to a string. If the URI has a port that is the
+default port for its scheme, the port is not included in the
+serialization."
+ (let* ((scheme (uri-scheme uri))
(userinfo (uri-userinfo uri))
(host (uri-host uri))
(port (uri-port uri))
(query (uri-query uri))
(fragment (uri-fragment uri)))
(string-append
- scheme-str
+ (if scheme
+ (string-append (symbol->string scheme) ":")
+ "")
(if host
(string-append "//"
(if userinfo (string-append userinfo "@")
(utf8->string bv)
(let ((p (open-bytevector-input-port bv)))
(set-port-encoding! p encoding)
- (let ((res (read-delimited "" p)))
+ (let ((res (read-string p)))
(close-port p)
res))))
;; characters in other character sets.
;;
-;; Return a new string made from uri-decoding @var{str}. Specifically,
-;; turn @code{+} into space, and hex-encoded @code{%XX} strings into
+;; Return a new string made from uri-decoding STR. Specifically,
+;; turn ‘+’ into space, and hex-encoded ‘%XX’ strings into
;; their eight-bit characters.
;;
(define hex-chars
(string->char-set "0123456789abcdefABCDEF"))
(define* (uri-decode str #:key (encoding "utf-8"))
- "Percent-decode the given @var{str}, according to @var{encoding}.
+ "Percent-decode the given STR, according to ENCODING,
+which should be the name of a character encoding.
Note that this function should not generally be applied to a full URI
-string. For paths, use split-and-decode-uri-path instead. For query
-strings, split the query on @code{&} and @code{=} boundaries, and decode
+string. For paths, use ‘split-and-decode-uri-path’ instead. For query
+strings, split the query on ‘&’ and ‘=’ boundaries, and decode
the components separately.
-Note that percent-encoded strings encode @emph{bytes}, not characters.
+Note also that percent-encoded strings encode _bytes_, not characters.
There is no guarantee that a given byte sequence is a valid string
encoding. Therefore this routine may signal an error if the decoded
-bytes are not valid for the given encoding. Pass @code{#f} for
-@var{encoding} if you want decoded bytes as a bytevector directly."
+bytes are not valid for the given encoding. Pass ‘#f’ for ENCODING if
+you want decoded bytes as a bytevector directly. ‘set-port-encoding!’,
+for more information on character encodings.
+
+Returns a string of the decoded characters, or a bytevector if
+ENCODING was ‘#f’."
(let* ((len (string-length str))
(bv
(call-with-output-bytevector*
(char-set-union ascii-alnum-chars
(string->char-set "-._~")))
-;; Return a new string made from uri-encoding @var{str}, unconditionally
-;; transforming any characters not in @var{unescaped-chars}.
+;; Return a new string made from uri-encoding STR, unconditionally
+;; transforming any characters not in UNESCAPED-CHARS.
;;
(define* (uri-encode str #:key (encoding "utf-8")
(unescaped-chars unreserved-chars))
- "Percent-encode any character not in the character set, @var{unescaped-chars}.
-
-Percent-encoding first writes out the given character to a bytevector
-within the given @var{encoding}, then encodes each byte as
-@code{%@var{HH}}, where @var{HH} is the hexadecimal representation of
-the byte."
- (if (string-index str unescaped-chars)
+ "Percent-encode any character not in the character set,
+UNESCAPED-CHARS.
+
+The default character set includes alphanumerics from ASCII, as well as
+the special characters ‘-’, ‘.’, ‘_’, and ‘~’. Any other character will
+be percent-encoded, by writing out the character to a bytevector within
+the given ENCODING, then encoding each byte as ‘%HH’, where HH is the
+uppercase hexadecimal representation of the byte."
+ (define (needs-escaped? ch)
+ (not (char-set-contains? unescaped-chars ch)))
+ (if (string-index str needs-escaped?)
(call-with-output-string*
(lambda (port)
(string-for-each
(if (< i len)
(let ((byte (bytevector-u8-ref bv i)))
(display #\% port)
- (display (number->string byte 16) port)
+ (when (< byte 16)
+ (display #\0 port))
+ (display (string-upcase (number->string byte 16))
+ port)
(lp (1+ i))))))))
str)))
str))
(define (split-and-decode-uri-path path)
- "Split @var{path} into its components, and decode each
-component, removing empty components.
+ "Split PATH into its components, and decode each component,
+removing empty components.
-For example, @code{\"/foo/bar/\"} decodes to the two-element list,
-@code{(\"foo\" \"bar\")}."
+For example, ‘\"/foo/bar%20baz/\"’ decodes to the two-element list,
+‘(\"foo\" \"bar baz\")’."
(filter (lambda (x) (not (string-null? x)))
(map uri-decode (string-split path #\/))))
(define (encode-and-join-uri-path parts)
- "URI-encode each element of @var{parts}, which should be a list of
-strings, and join the parts together with @code{/} as a delimiter."
+ "URI-encode each element of PARTS, which should be a list of
+strings, and join the parts together with ‘/’ as a delimiter.
+
+For example, the list ‘(\"scrambled eggs\" \"biscuits&gravy\")’
+encodes as ‘\"scrambled%20eggs/biscuits%26gravy\"’."
(string-join (map uri-encode parts) "/"))