read-response-body always returns bytevector or #f
[bpt/guile.git] / module / web / client.scm
CommitLineData
680c8c5a
AW
1;;; Web client
2
2663411b 3;; Copyright (C) 2011, 2012 Free Software Foundation, Inc.
680c8c5a
AW
4
5;; This library is free software; you can redistribute it and/or
6;; modify it under the terms of the GNU Lesser General Public
7;; License as published by the Free Software Foundation; either
8;; version 3 of the License, or (at your option) any later version.
9;;
10;; This library is distributed in the hope that it will be useful,
11;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13;; Lesser General Public License for more details.
14;;
15;; You should have received a copy of the GNU Lesser General Public
16;; License along with this library; if not, write to the Free Software
17;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18;; 02110-1301 USA
19
20;;; Commentary:
21;;;
22;;; (web client) is a simple HTTP URL fetcher for Guile.
23;;;
24;;; In its current incarnation, (web client) is synchronous. If you
25;;; want to fetch a number of URLs at once, probably the best thing to
26;;; do is to write an event-driven URL fetcher, similar in structure to
27;;; the web server.
28;;;
29;;; Another option, good but not as performant, would be to use threads,
fe0c202c 30;;; possibly via a thread pool.
680c8c5a
AW
31;;;
32;;; Code:
33
34(define-module (web client)
35 #:use-module (rnrs bytevectors)
36 #:use-module (ice-9 binary-ports)
37 #:use-module (ice-9 rdelim)
38 #:use-module (web request)
39 #:use-module (web response)
40 #:use-module (web uri)
b9d72498 41 #:use-module (srfi srfi-1)
680c8c5a 42 #:export (open-socket-for-uri
91e693a8
LC
43 http-get
44 http-get*))
680c8c5a
AW
45
46(define (open-socket-for-uri uri)
2663411b
LC
47 "Return an open input/output port for a connection to URI."
48 (define addresses
d74fcce9 49 (let ((port (uri-port uri)))
b9d72498
LC
50 (delete-duplicates
51 (getaddrinfo (uri-host uri)
52 (cond (port => number->string)
53 (else (symbol->string (uri-scheme uri))))
54 (if port
55 AI_NUMERICSERV
56 0))
57 (lambda (ai1 ai2)
58 (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
2663411b
LC
59
60 (let loop ((addresses addresses))
61 (let* ((ai (car addresses))
b9d72498
LC
62 (s (with-fluids ((%default-port-encoding #f))
63 ;; Restrict ourselves to TCP.
64 (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
2663411b
LC
65 (catch 'system-error
66 (lambda ()
67 (connect s (addrinfo:addr ai))
68
69 ;; Buffer input and output on this port.
70 (setvbuf s _IOFBF)
71 ;; Enlarge the receive buffer.
72 (setsockopt s SOL_SOCKET SO_RCVBUF (* 12 1024))
73 s)
74 (lambda args
75 ;; Connection failed, so try one of the other addresses.
f865ffaa 76 (close s)
b9d72498 77 (if (null? (cdr addresses))
2663411b 78 (apply throw args)
f865ffaa 79 (loop (cdr addresses))))))))
680c8c5a
AW
80
81(define (decode-string bv encoding)
82 (if (string-ci=? encoding "utf-8")
83 (utf8->string bv)
84 (let ((p (open-bytevector-input-port bv)))
85 (set-port-encoding! p encoding)
86 (let ((res (read-delimited "" p)))
87 (close-port p)
88 res))))
89
680c8c5a
AW
90;; Logically the inverse of (web server)'s `sanitize-response'.
91;;
92(define (decode-response-body response body)
93 ;; `body' is either #f or a bytevector.
94 (cond
95 ((not body) body)
96 ((bytevector? body)
97 (let ((rlen (response-content-length response))
98 (blen (bytevector-length body)))
99 (cond
100 ((and rlen (not (= rlen blen)))
101 (error "bad content-length" rlen blen))
102 ((response-content-type response)
103 => (lambda (type)
104 (cond
ee2d8741 105 ((text-content-type? (car type))
680c8c5a
AW
106 (decode-string body (or (assq-ref (cdr type) 'charset)
107 "iso-8859-1")))
108 (else body))))
109 (else body))))
110 (else
111 (error "unexpected body type" body))))
112
113(define* (http-get uri #:key (port (open-socket-for-uri uri))
114 (version '(1 . 1)) (keep-alive? #f) (extra-headers '())
115 (decode-body? #t))
06883ae0
DH
116 "Connect to the server corresponding to URI and ask for the
117resource, using the ‘GET’ method. If you already have a port open,
118pass it as PORT. The port will be closed at the end of the
119request unless KEEP-ALIVE? is true. Any extra headers in the
120alist EXTRA-HEADERS will be added to the request.
121
122If DECODE-BODY? is true, as is the default, the body of the
123response will be decoded to string, if it is a textual content-type.
124Otherwise it will be returned as a bytevector."
680c8c5a
AW
125 (let ((req (build-request uri #:version version
126 #:headers (if keep-alive?
127 extra-headers
128 (cons '(connection close)
129 extra-headers)))))
130 (write-request req port)
131 (force-output port)
680c8c5a
AW
132 (let* ((res (read-response port))
133 (body (read-response-body res)))
134 (if (not keep-alive?)
135 (close-port port))
136 (values res
137 (if decode-body?
138 (decode-response-body res body)
139 body)))))
91e693a8
LC
140
141(define* (http-get* uri #:key (port (open-socket-for-uri uri))
142 (version '(1 . 1)) (keep-alive? #f) (extra-headers '())
143 (decode-body? #t))
144 "Like ‘http-get’, but return an input port from which to read. When
145DECODE-BODY? is true, as is the default, the returned port has its
146encoding set appropriately if the data at URI is textual. Closing the
147returned port closes PORT, unless KEEP-ALIVE? is true."
148 (let ((req (build-request uri #:version version
149 #:headers (if keep-alive?
150 extra-headers
151 (cons '(connection close)
152 extra-headers)))))
153 (write-request req port)
154 (force-output port)
155 (unless keep-alive?
156 (shutdown port 1))
157 (let* ((res (read-response port))
158 (body (response-body-port res
159 #:keep-alive? keep-alive?
160 #:decode? decode-body?)))
161 (values res body))))