http-get: don't shutdown write end of socket
[bpt/guile.git] / module / web / client.scm
1 ;;; Web client
2
3 ;; Copyright (C) 2011, 2012 Free Software Foundation, Inc.
4
5 ;; This library is free software; you can redistribute it and/or
6 ;; modify it under the terms of the GNU Lesser General Public
7 ;; License as published by the Free Software Foundation; either
8 ;; version 3 of the License, or (at your option) any later version.
9 ;;
10 ;; This library is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; Lesser General Public License for more details.
14 ;;
15 ;; You should have received a copy of the GNU Lesser General Public
16 ;; License along with this library; if not, write to the Free Software
17 ;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 ;; 02110-1301 USA
19
20 ;;; Commentary:
21 ;;;
22 ;;; (web client) is a simple HTTP URL fetcher for Guile.
23 ;;;
24 ;;; In its current incarnation, (web client) is synchronous. If you
25 ;;; want to fetch a number of URLs at once, probably the best thing to
26 ;;; do is to write an event-driven URL fetcher, similar in structure to
27 ;;; the web server.
28 ;;;
29 ;;; Another option, good but not as performant, would be to use threads,
30 ;;; possibly via a thread pool.
31 ;;;
32 ;;; Code:
33
34 (define-module (web client)
35 #:use-module (rnrs bytevectors)
36 #:use-module (ice-9 binary-ports)
37 #:use-module (ice-9 rdelim)
38 #:use-module (web request)
39 #:use-module (web response)
40 #:use-module (web uri)
41 #:use-module (srfi srfi-1)
42 #:export (open-socket-for-uri
43 http-get
44 http-get*))
45
46 (define (open-socket-for-uri uri)
47 "Return an open input/output port for a connection to URI."
48 (define addresses
49 (let ((port (uri-port uri)))
50 (delete-duplicates
51 (getaddrinfo (uri-host uri)
52 (cond (port => number->string)
53 (else (symbol->string (uri-scheme uri))))
54 (if port
55 AI_NUMERICSERV
56 0))
57 (lambda (ai1 ai2)
58 (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
59
60 (let loop ((addresses addresses))
61 (let* ((ai (car addresses))
62 (s (with-fluids ((%default-port-encoding #f))
63 ;; Restrict ourselves to TCP.
64 (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
65 (catch 'system-error
66 (lambda ()
67 (connect s (addrinfo:addr ai))
68
69 ;; Buffer input and output on this port.
70 (setvbuf s _IOFBF)
71 ;; Enlarge the receive buffer.
72 (setsockopt s SOL_SOCKET SO_RCVBUF (* 12 1024))
73 s)
74 (lambda args
75 ;; Connection failed, so try one of the other addresses.
76 (close s)
77 (if (null? (cdr addresses))
78 (apply throw args)
79 (loop (cdr addresses))))))))
80
81 (define (decode-string bv encoding)
82 (if (string-ci=? encoding "utf-8")
83 (utf8->string bv)
84 (let ((p (open-bytevector-input-port bv)))
85 (set-port-encoding! p encoding)
86 (let ((res (read-delimited "" p)))
87 (close-port p)
88 res))))
89
90 ;; Logically the inverse of (web server)'s `sanitize-response'.
91 ;;
92 (define (decode-response-body response body)
93 ;; `body' is either #f or a bytevector.
94 (cond
95 ((not body) body)
96 ((bytevector? body)
97 (let ((rlen (response-content-length response))
98 (blen (bytevector-length body)))
99 (cond
100 ((and rlen (not (= rlen blen)))
101 (error "bad content-length" rlen blen))
102 ((response-content-type response)
103 => (lambda (type)
104 (cond
105 ((text-content-type? (car type))
106 (decode-string body (or (assq-ref (cdr type) 'charset)
107 "iso-8859-1")))
108 (else body))))
109 (else body))))
110 (else
111 (error "unexpected body type" body))))
112
113 (define* (http-get uri #:key (port (open-socket-for-uri uri))
114 (version '(1 . 1)) (keep-alive? #f) (extra-headers '())
115 (decode-body? #t))
116 "Connect to the server corresponding to URI and ask for the
117 resource, using the ‘GET’ method. If you already have a port open,
118 pass it as PORT. The port will be closed at the end of the
119 request unless KEEP-ALIVE? is true. Any extra headers in the
120 alist EXTRA-HEADERS will be added to the request.
121
122 If DECODE-BODY? is true, as is the default, the body of the
123 response will be decoded to string, if it is a textual content-type.
124 Otherwise it will be returned as a bytevector."
125 (let ((req (build-request uri #:version version
126 #:headers (if keep-alive?
127 extra-headers
128 (cons '(connection close)
129 extra-headers)))))
130 (write-request req port)
131 (force-output port)
132 (let* ((res (read-response port))
133 (body (read-response-body res)))
134 (if (not keep-alive?)
135 (close-port port))
136 (values res
137 (if decode-body?
138 (decode-response-body res body)
139 body)))))
140
141 (define* (http-get* uri #:key (port (open-socket-for-uri uri))
142 (version '(1 . 1)) (keep-alive? #f) (extra-headers '())
143 (decode-body? #t))
144 "Like ‘http-get’, but return an input port from which to read. When
145 DECODE-BODY? is true, as is the default, the returned port has its
146 encoding set appropriately if the data at URI is textual. Closing the
147 returned port closes PORT, unless KEEP-ALIVE? is true."
148 (let ((req (build-request uri #:version version
149 #:headers (if keep-alive?
150 extra-headers
151 (cons '(connection close)
152 extra-headers)))))
153 (write-request req port)
154 (force-output port)
155 (unless keep-alive?
156 (shutdown port 1))
157 (let* ((res (read-response port))
158 (body (response-body-port res
159 #:keep-alive? keep-alive?
160 #:decode? decode-body?)))
161 (values res body))))