more robustness in http-read
[bpt/guile.git] / module / web / request.scm
1 ;;; HTTP request objects
2
3 ;; Copyright (C) 2010 Free Software Foundation, Inc.
4
5 ;; This library is free software; you can redistribute it and/or
6 ;; modify it under the terms of the GNU Lesser General Public
7 ;; License as published by the Free Software Foundation; either
8 ;; version 3 of the License, or (at your option) any later version.
9 ;;
10 ;; This library is distributed in the hope that it will be useful,
11 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
12 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ;; Lesser General Public License for more details.
14 ;;
15 ;; You should have received a copy of the GNU Lesser General Public
16 ;; License along with this library; if not, write to the Free Software
17 ;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
18 ;; 02110-1301 USA
19
20 ;;; Code:
21
22 (define-module (web request)
23 #:use-module (rnrs bytevectors)
24 #:use-module (rnrs io ports)
25 #:use-module (ice-9 rdelim)
26 #:use-module (srfi srfi-9)
27 #:use-module (web uri)
28 #:use-module (web http)
29 #:export (request?
30 request-method
31 request-uri
32 request-version
33 request-headers
34 request-meta
35 request-port
36
37 read-request
38 build-request
39 write-request
40
41 read-request-body/latin-1
42 write-request-body/latin-1
43
44 read-request-body/bytevector
45 write-request-body/bytevector
46
47 ;; General headers
48 ;;
49 request-cache-control
50 request-connection
51 request-date
52 request-pragma
53 request-trailer
54 request-transfer-encoding
55 request-upgrade
56 request-via
57 request-warning
58
59 ;; Entity headers
60 ;;
61 request-allow
62 request-content-encoding
63 request-content-language
64 request-content-length
65 request-content-location
66 request-content-md5
67 request-content-range
68 request-content-type
69 request-expires
70 request-last-modified
71
72 ;; Request headers
73 ;;
74 request-accept
75 request-accept-charset
76 request-accept-encoding
77 request-accept-language
78 request-authorization
79 request-expect
80 request-from
81 request-host
82 request-if-match
83 request-if-modified-since
84 request-if-none-match
85 request-if-range
86 request-if-unmodified-since
87 request-max-forwards
88 request-proxy-authorization
89 request-range
90 request-referer
91 request-te
92 request-user-agent
93
94 ;; Misc
95 request-absolute-uri))
96
97
98 ;;; {Character Encodings, Strings, and Bytevectors}
99 ;;;
100 ;;; Requests are read from over the wire, and as such have to be treated
101 ;;; very carefully.
102 ;;;
103 ;;; The header portion of the message is defined to be in a subset of
104 ;;; ASCII, and may be processed either byte-wise (using bytevectors and
105 ;;; binary I/O) or as characters in a single-byte ASCII-compatible
106 ;;; encoding.
107 ;;;
108 ;;; We choose the latter, processing as strings in the latin-1
109 ;;; encoding. This allows us to use all the read-delimited machinery,
110 ;;; character sets, and regular expressions, shared substrings, etc.
111 ;;;
112 ;;; The characters in the header values may themselves encode other
113 ;;; bytes or characters -- basically each header has its own parser. We
114 ;;; leave that as a header-specific topic.
115 ;;;
116 ;;; The body is present if the content-length header is present. Its
117 ;;; format and, if textual, encoding is determined by the headers, but
118 ;;; its length is encoded in bytes. So we just slurp that number of
119 ;;; characters in latin-1, knowing that the number of characters
120 ;;; corresponds to the number of bytes, and then convert to a
121 ;;; bytevector, perhaps for later decoding.
122 ;;;
123
124 (define-record-type <request>
125 (make-request method uri version headers meta port)
126 request?
127 (method request-method)
128 (uri request-uri)
129 (version request-version)
130 (headers request-headers)
131 (meta request-meta)
132 (port request-port))
133
134 (define (bad-request message . args)
135 (throw 'bad-request message args))
136
137 (define (non-negative-integer? n)
138 (and (number? n) (>= n 0) (exact? n) (integer? n)))
139
140 (define (validate-headers headers)
141 (if (pair? headers)
142 (let ((h (car headers)))
143 (if (pair? h)
144 (let ((k (car h)) (v (cdr h)))
145 (if (symbol? k)
146 (if (not (valid-header? k v))
147 (bad-request "Bad value for header ~a: ~s" k v))
148 (if (not (and (string? k) (string? v)))
149 (bad-request "Unknown header not a pair of strings: ~s"
150 h)))
151 (validate-headers (cdr headers)))
152 (bad-request "Header not a pair: ~a" h)))
153 (if (not (null? headers))
154 (bad-request "Headers not a list: ~a" headers))))
155
156 (define* (build-request #:key (method 'GET) uri (version '(1 . 1))
157 (headers '()) port (meta '())
158 (validate-headers? #t))
159 (cond
160 ((not (and (pair? version)
161 (non-negative-integer? (car version))
162 (non-negative-integer? (cdr version))))
163 (bad-request "Bad version: ~a" version))
164 ((not (uri? uri))
165 (bad-request "Bad uri: ~a" uri))
166 ((and (not port) (memq method '(POST PUT)))
167 (bad-request "Missing port for message ~a" method))
168 ((not (list? meta))
169 (bad-request "Bad metadata alist" meta))
170 (else
171 (if validate-headers?
172 (validate-headers headers))))
173 (make-request method uri version headers meta port))
174
175 (define* (read-request port #:optional (meta '()))
176 (set-port-encoding! port "ISO-8859-1")
177 (call-with-values (lambda () (read-request-line port))
178 (lambda (method uri version)
179 (make-request method uri version (read-headers port) meta port))))
180
181 ;; FIXME: really return a new request?
182 (define (write-request r port)
183 (write-request-line (request-method r) (request-uri r)
184 (request-version r) port)
185 (write-headers (request-headers r) port)
186 (display "\r\n" port)
187 (if (eq? port (request-port r))
188 r
189 (make-request (request-method r) (request-uri r) (request-version r)
190 (request-headers r) (request-meta r) port)))
191
192 ;; Probably not what you want to use "in production". Relies on one byte
193 ;; per char because we are in latin-1 encoding.
194 ;;
195 (define (read-request-body/latin-1 r)
196 (let ((nbytes (request-content-length r)))
197 (and nbytes
198 (let* ((buf (make-string nbytes))
199 (n (read-delimited! "" buf (request-port r))))
200 (if (= n nbytes)
201 buf
202 (bad-request "EOF while reading request body: ~a bytes of ~a"
203 n nbytes))))))
204
205 ;; Likewise, assumes that body can be written in the latin-1 encoding,
206 ;; and that the latin-1 encoding is what is expected by the server.
207 ;;
208 (define (write-request-body/latin-1 r body)
209 (display body (request-port r)))
210
211 (define (read-request-body/bytevector r)
212 (let ((nbytes (request-content-length r)))
213 (and nbytes
214 (let ((bv (get-bytevector-n (request-port r) nbytes)))
215 (if (= (bytevector-length bv) nbytes)
216 bv
217 (bad-request "EOF while reading request body: ~a bytes of ~a"
218 (bytevector-length bv) nbytes))))))
219
220 (define (write-request-body/bytevector r bv)
221 (put-bytevector (request-port r) bv))
222
223 (define-syntax define-request-accessor
224 (lambda (x)
225 (syntax-case x ()
226 ((_ field)
227 #'(define-request-accessor field #f))
228 ((_ field def) (identifier? #'field)
229 #`(define* (#,(datum->syntax
230 #'field
231 (symbol-append 'request- (syntax->datum #'field)))
232 request
233 #:optional (default def))
234 (cond
235 ((assq 'field (request-headers request)) => cdr)
236 (else default)))))))
237
238 ;; General headers
239 ;;
240 (define-request-accessor cache-control '())
241 (define-request-accessor connection '())
242 (define-request-accessor date #f)
243 (define-request-accessor pragma '())
244 (define-request-accessor trailer '())
245 (define-request-accessor transfer-encoding '())
246 (define-request-accessor upgrade '())
247 (define-request-accessor via '())
248 (define-request-accessor warning '())
249
250 ;; Entity headers
251 ;;
252 (define-request-accessor allow '())
253 (define-request-accessor content-encoding '())
254 (define-request-accessor content-language '())
255 (define-request-accessor content-length #f)
256 (define-request-accessor content-location #f)
257 (define-request-accessor content-md5 #f)
258 (define-request-accessor content-range #f)
259 (define-request-accessor content-type #f)
260 (define-request-accessor expires #f)
261 (define-request-accessor last-modified #f)
262
263 ;; Request headers
264 ;;
265 (define-request-accessor accept '())
266 (define-request-accessor accept-charset '())
267 (define-request-accessor accept-encoding '())
268 (define-request-accessor accept-language '())
269 (define-request-accessor authorization #f)
270 (define-request-accessor expect '())
271 (define-request-accessor from #f)
272 (define-request-accessor host #f)
273 ;; Absence of an if-directive appears to be different from `*'.
274 (define-request-accessor if-match #f)
275 (define-request-accessor if-modified-since #f)
276 (define-request-accessor if-none-match #f)
277 (define-request-accessor if-range #f)
278 (define-request-accessor if-unmodified-since #f)
279 (define-request-accessor max-forwards #f)
280 (define-request-accessor proxy-authorization #f)
281 (define-request-accessor range #f)
282 (define-request-accessor referer #f)
283 (define-request-accessor te '())
284 (define-request-accessor user-agent #f)
285
286 ;; Misc accessors
287 (define* (request-absolute-uri r #:optional default-host default-port)
288 (let ((uri (request-uri r)))
289 (if (uri-host uri)
290 uri
291 (let ((host
292 (or (request-host r)
293 (if default-host
294 (cons default-host default-port)
295 (bad-request
296 "URI not absolute, no Host header, and no default: ~s"
297 uri)))))
298 (build-uri (uri-scheme uri)
299 #:host (car host)
300 #:port (cdr host)
301 #:path (uri-path uri)
302 #:query (uri-query uri)
303 #:fragment (uri-fragment uri))))))