From ad05d4e8c6ccd17a826af3a4df38f055eb3fc9b9 Mon Sep 17 00:00:00 2001 From: Andy Wingo Date: Fri, 22 Oct 2010 00:06:32 +0200 Subject: [PATCH] add HTTP request module * module/web/request.scm: Add HTTP request module. * test-suite/tests/web-request.test: Test cases. * module/Makefile.am: * test-suite/Makefile.am: Adapt. --- module/Makefile.am | 1 + module/web/request.scm | 294 ++++++++++++++++++++++++++++++ test-suite/Makefile.am | 1 + test-suite/tests/web-request.test | 84 +++++++++ 4 files changed, 380 insertions(+) create mode 100644 module/web/request.scm create mode 100644 test-suite/tests/web-request.test diff --git a/module/Makefile.am b/module/Makefile.am index 13e9b3419..598726020 100644 --- a/module/Makefile.am +++ b/module/Makefile.am @@ -350,6 +350,7 @@ LIB_SOURCES = \ WEB_SOURCES = \ web/http.scm \ + web/request.scm \ web/uri.scm EXTRA_DIST += oop/ChangeLog-2008 diff --git a/module/web/request.scm b/module/web/request.scm new file mode 100644 index 000000000..8e29589b3 --- /dev/null +++ b/module/web/request.scm @@ -0,0 +1,294 @@ +;;; HTTP request objects + +;; Copyright (C) 2010 Free Software Foundation, Inc. + +;; This library is free software; you can redistribute it and/or +;; modify it under the terms of the GNU Lesser General Public +;; License as published by the Free Software Foundation; either +;; version 3 of the License, or (at your option) any later version. +;; +;; This library is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;; Lesser General Public License for more details. +;; +;; You should have received a copy of the GNU Lesser General Public +;; License along with this library; if not, write to the Free Software +;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA +;; 02110-1301 USA + +;;; Code: + +(define-module (web request) + #:use-module (rnrs bytevectors) + #:use-module (rnrs io ports) + #:use-module (ice-9 rdelim) + #:use-module (srfi srfi-9) + #:use-module (web uri) + #:use-module (web http) + #:export (request? + request-method + request-uri + request-version + request-headers + request-port + + read-request + build-request + write-request + + read-request-body/latin-1 + write-request-body/latin-1 + + read-request-body/bytevector + write-request-body/bytevector + + ;; General headers + ;; + request-cache-control + request-connection + request-date + request-pragma + request-trailer + request-transfer-encoding + request-upgrade + request-via + request-warning + + ;; Entity headers + ;; + request-allow + request-content-encoding + request-content-language + request-content-length + request-content-location + request-content-md5 + request-content-range + request-content-type + request-expires + request-last-modified + + ;; Request headers + ;; + request-accept + request-accept-charset + request-accept-encoding + request-accept-language + request-authorization + request-expect + request-from + request-host + request-if-match + request-if-modified-since + request-if-none-match + request-if-range + request-if-unmodified-since + request-max-forwards + request-proxy-authorization + request-range + request-referer + request-te + request-user-agent + + ;; Misc + request-absolute-uri)) + + +;;; {Character Encodings, Strings, and Bytevectors} +;;; +;;; Requests are read from over the wire, and as such have to be treated +;;; very carefully. +;;; +;;; The header portion of the message is defined to be in a subset of +;;; ASCII, and may be processed either byte-wise (using bytevectors and +;;; binary I/O) or as characters in a single-byte ASCII-compatible +;;; encoding. +;;; +;;; We choose the latter, processing as strings in the latin-1 +;;; encoding. This allows us to use all the read-delimited machinery, +;;; character sets, and regular expressions, shared substrings, etc. +;;; +;;; The characters in the header values may themselves encode other +;;; bytes or characters -- basically each header has its own parser. We +;;; leave that as a header-specific topic. +;;; +;;; The body is present if the content-length header is present. Its +;;; format and, if textual, encoding is determined by the headers, but +;;; its length is encoded in bytes. So we just slurp that number of +;;; characters in latin-1, knowing that the number of characters +;;; corresponds to the number of bytes, and then convert to a +;;; bytevector, perhaps for later decoding. +;;; + +(define-record-type + (make-request method uri version headers port) + request? + (method request-method) + (uri request-uri) + (version request-version) + (headers request-headers) + (port request-port)) + +(define (bad-request message . args) + (throw 'bad-request message args)) + +(define (non-negative-integer? n) + (and (number? n) (>= n 0) (exact? n) (integer? n))) + +(define (validate-headers headers) + (if (pair? headers) + (let ((h (car headers))) + (if (pair? h) + (let ((k (car h)) (v (cdr h))) + (if (symbol? k) + (if (not (valid-header? k v)) + (bad-request "Bad value for header ~a: ~s" k v)) + (if (not (and (string? k) (string? v))) + (bad-request "Unknown header not a pair of strings: ~s" + h))) + (validate-headers (cdr headers))) + (bad-request "Header not a pair: ~a" h))) + (if (not (null? headers)) + (bad-request "Headers not a list: ~a" headers)))) + +(define* (build-request #:key (method 'GET) uri (version '(1 . 1)) + (headers '()) port (validate-headers? #t)) + (cond + ((not (and (pair? version) + (non-negative-integer? (car version)) + (non-negative-integer? (cdr version)))) + (bad-request "Bad version: ~a" version)) + ((not (uri? uri)) + (bad-request "Bad uri: ~a" uri)) + ((and (not port) (memq method '(POST PUT))) + (bad-request "Missing port for message ~a" method)) + (else + (if validate-headers? + (validate-headers headers)))) + (make-request method uri version headers port)) + +(define (read-request port) + (set-port-encoding! port "ISO-8859-1") + (call-with-values (lambda () (read-request-line port)) + (lambda (method uri version) + (make-request method uri version (read-headers port) port)))) + +(define (write-request r port) + (write-request-line (request-method r) (request-uri r) + (request-version r) port) + (write-headers (request-headers r) port) + (display "\r\n" port) + (if (eq? port (request-port r)) + r + (make-request (request-method r) (request-uri r) (request-version r) + (request-headers r) port))) + +;; Probably not what you want to use "in production". Relies on one byte +;; per char because we are in latin-1 encoding. +;; +(define (read-request-body/latin-1 r) + (let ((nbytes (request-content-length r))) + (and nbytes + (let ((buf (make-string nbytes))) + (read-delimited! "" buf (request-port r)) + buf)))) + +;; Likewise, assumes that body can be written in the latin-1 encoding, +;; and that the latin-1 encoding is what is expected by the server. +;; +(define (write-request-body/latin-1 r body) + (display body (request-port r))) + +(define (read-request-body/bytevector r) + (let ((nbytes (request-content-length r))) + (and nbytes + (let ((bv (get-bytevector-n (request-port r) nbytes))) + (if (= (bytevector-length bv) nbytes) + bv + (bad-request "EOF while reading request body: ~a bytes of ~a" + (bytevector-length bv) nbytes)))))) + +(define (write-request-body/bytevector r bv) + (put-bytevector (request-port r) bv)) + +(define-syntax define-request-accessor + (lambda (x) + (syntax-case x () + ((_ field) + #'(define-request-accessor field #f)) + ((_ field def) (identifier? #'field) + #`(define* (#,(datum->syntax + #'field + (symbol-append 'request- (syntax->datum #'field))) + request + #:optional (default def)) + (cond + ((assq 'field (request-headers request)) => cdr) + (else default))))))) + +;; General headers +;; +(define-request-accessor cache-control '()) +(define-request-accessor connection '()) +(define-request-accessor date #f) +(define-request-accessor pragma '()) +(define-request-accessor trailer '()) +(define-request-accessor transfer-encoding '()) +(define-request-accessor upgrade '()) +(define-request-accessor via '()) +(define-request-accessor warning '()) + +;; Entity headers +;; +(define-request-accessor allow '()) +(define-request-accessor content-encoding '()) +(define-request-accessor content-language '()) +(define-request-accessor content-length #f) +(define-request-accessor content-location #f) +(define-request-accessor content-md5 #f) +(define-request-accessor content-range #f) +(define-request-accessor content-type #f) +(define-request-accessor expires #f) +(define-request-accessor last-modified #f) + +;; Request headers +;; +(define-request-accessor accept '()) +(define-request-accessor accept-charset '()) +(define-request-accessor accept-encoding '()) +(define-request-accessor accept-language '()) +(define-request-accessor authorization #f) +(define-request-accessor expect '()) +(define-request-accessor from #f) +(define-request-accessor host #f) +;; Absence of an if-directive appears to be different from `*'. +(define-request-accessor if-match #f) +(define-request-accessor if-modified-since #f) +(define-request-accessor if-none-match #f) +(define-request-accessor if-range #f) +(define-request-accessor if-unmodified-since #f) +(define-request-accessor max-forwards #f) +(define-request-accessor proxy-authorization #f) +(define-request-accessor range #f) +(define-request-accessor referer #f) +(define-request-accessor te '()) +(define-request-accessor user-agent #f) + +;; Misc accessors +(define* (request-absolute-uri r #:optional default-host default-port) + (let ((uri (request-uri r))) + (if (uri-host uri) + uri + (let ((host + (or (request-host r) + (if default-host + (cons default-host default-port) + (bad-request + "URI not absolute, no Host header, and no default: ~s" + uri))))) + (build-uri (uri-scheme uri) + #:host (car host) + #:port (cdr host) + #:path (uri-path uri) + #:query (uri-query uri) + #:fragment (uri-fragment uri)))))) diff --git a/test-suite/Makefile.am b/test-suite/Makefile.am index 0f352ce38..c2a0b4fb8 100644 --- a/test-suite/Makefile.am +++ b/test-suite/Makefile.am @@ -151,6 +151,7 @@ SCM_TESTS = tests/00-initial-env.test \ tests/vlist.test \ tests/weaks.test \ tests/web-http.test \ + tests/web-request.test \ tests/web-uri.test EXTRA_DIST = \ diff --git a/test-suite/tests/web-request.test b/test-suite/tests/web-request.test new file mode 100644 index 000000000..82759bd6b --- /dev/null +++ b/test-suite/tests/web-request.test @@ -0,0 +1,84 @@ +;;;; web-request.test --- HTTP requests -*- mode: scheme; coding: utf-8; -*- +;;;; +;;;; Copyright (C) 2010 Free Software Foundation, Inc. +;;;; +;;;; This library is free software; you can redistribute it and/or +;;;; modify it under the terms of the GNU Lesser General Public +;;;; License as published by the Free Software Foundation; either +;;;; version 3 of the License, or (at your option) any later version. +;;;; +;;;; This library is distributed in the hope that it will be useful, +;;;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +;;;; Lesser General Public License for more details. +;;;; +;;;; You should have received a copy of the GNU Lesser General Public +;;;; License along with this library; if not, write to the Free Software +;;;; Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + + +(define-module (test-suite web-request) + #:use-module (web uri) + #:use-module (web request) + #:use-module (test-suite lib)) + + +;; The newlines are equivalent to \n. +(define example-1 + "GET /qux HTTP/1.1\r +Host: localhost:8080\r +User-Agent: Mozilla/5.0 (X11; U; Linux x86_64; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Safari/531.2+ Epiphany/2.30.2\r +Accept: application/xml,application/xhtml+xml,text/html;q=0.9,text/plain;q=0.8,image/png,*/*;q=0.5\r +Accept-Encoding: gzip\r +Accept-Language: en-gb, en;q=0.9\r +\r +") + +(define (requests-equal? r1 r2) + (and (equal? (request-method r1) (request-method r2)) + (equal? (request-uri r1) (request-uri r2)) + (equal? (request-version r1) (request-version r2)) + (equal? (request-headers r1) (request-headers r2)))) + +(with-test-prefix "example-1" + (let ((r #f)) + (pass-if "read-request" + (begin + (set! r (read-request (open-input-string example-1))) + (request? r))) + + (pass-if (equal? (request-method r) 'GET)) + + (pass-if (equal? (request-uri r) (build-uri 'http #:path "/qux"))) + + (pass-if (equal? (read-request-body/latin-1 r) #f)) + ;; Since it's #f, should be an idempotent read, so we can try + ;; bytevectors too + (pass-if (equal? (read-request-body/bytevector r) #f)) + + (pass-if "checking all headers" + (equal? + (request-headers r) + '((host . ("localhost" . 8080)) + (user-agent . "Mozilla/5.0 (X11; U; Linux x86_64; en-us) AppleWebKit/531.2+ (KHTML, like Gecko) Safari/531.2+ Epiphany/2.30.2") + (accept . (("application/xml") + ("application/xhtml+xml") + ("text/html" (q . 900)) + ("text/plain" (q . 800)) + ("image/png") + ("*/*" (q . 500)))) + (accept-encoding . ((1000 . "gzip"))) + (accept-language . ((1000 . "en-gb") (900 . "en")))))) + + ;; works because there is no body + (pass-if "write then read" + (requests-equal? (with-input-from-string + (with-output-to-string + (lambda () + (write-request r (current-output-port)))) + (lambda () + (read-request (current-input-port)))) + r)) + + (pass-if "by accessor" + (equal? (request-accept-encoding r) '((1000 . "gzip")))))) -- 2.20.1