http-client: Avoid name clash with 'open-connection-for-uri' in 2.2.0.
[jackhill/guix/guix.git] / guix / build / download.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017 Ludovic Courtès <ludo@gnu.org>
3 ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org>
4 ;;; Copyright © 2015 Steve Sprang <scs@stevesprang.com>
5 ;;;
6 ;;; This file is part of GNU Guix.
7 ;;;
8 ;;; GNU Guix is free software; you can redistribute it and/or modify it
9 ;;; under the terms of the GNU General Public License as published by
10 ;;; the Free Software Foundation; either version 3 of the License, or (at
11 ;;; your option) any later version.
12 ;;;
13 ;;; GNU Guix is distributed in the hope that it will be useful, but
14 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;;; GNU General Public License for more details.
17 ;;;
18 ;;; You should have received a copy of the GNU General Public License
19 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20
21 (define-module (guix build download)
22 #:use-module (web uri)
23 #:use-module (web http)
24 #:use-module ((web client) #:hide (open-socket-for-uri))
25 #:use-module (web response)
26 #:use-module (guix base64)
27 #:use-module (guix ftp-client)
28 #:use-module (guix build utils)
29 #:use-module (rnrs io ports)
30 #:use-module (rnrs bytevectors)
31 #:use-module (srfi srfi-1)
32 #:use-module (srfi srfi-11)
33 #:use-module (srfi srfi-19)
34 #:use-module (srfi srfi-26)
35 #:autoload (ice-9 ftw) (scandir)
36 #:use-module (ice-9 match)
37 #:use-module (ice-9 format)
38 #:export (open-socket-for-uri
39 open-connection-for-uri
40 %x509-certificate-directory
41 close-connection
42 resolve-uri-reference
43 maybe-expand-mirrors
44 url-fetch
45 byte-count->string
46 current-terminal-columns
47 progress-proc
48 uri-abbreviation
49 nar-uri-abbreviation
50 store-path-abbreviation))
51
52 ;;; Commentary:
53 ;;;
54 ;;; Fetch data such as tarballs over HTTP or FTP (builder-side code).
55 ;;;
56 ;;; Code:
57
58 (define %http-receive-buffer-size
59 ;; Size of the HTTP receive buffer.
60 65536)
61
62 (define current-terminal-columns
63 ;; Number of columns of the terminal.
64 (make-parameter 80))
65
66 (define (nearest-exact-integer x)
67 "Given a real number X, return the nearest exact integer, with ties going to
68 the nearest exact even integer."
69 (inexact->exact (round x)))
70
71 (define (duration->seconds duration)
72 "Return the number of seconds represented by DURATION, a 'time-duration'
73 object, as an inexact number."
74 (+ (time-second duration)
75 (/ (time-nanosecond duration) 1e9)))
76
77 (define (seconds->string duration)
78 "Given DURATION in seconds, return a string representing it in 'mm:ss' or
79 'hh:mm:ss' format, as needed."
80 (if (not (number? duration))
81 "00:00"
82 (let* ((total-seconds (nearest-exact-integer duration))
83 (extra-seconds (modulo total-seconds 3600))
84 (num-hours (quotient total-seconds 3600))
85 (hours (and (positive? num-hours) num-hours))
86 (mins (quotient extra-seconds 60))
87 (secs (modulo extra-seconds 60)))
88 (format #f "~@[~2,'0d:~]~2,'0d:~2,'0d" hours mins secs))))
89
90 (define (byte-count->string size)
91 "Given SIZE in bytes, return a string representing it in a human-readable
92 way."
93 (let ((KiB 1024.)
94 (MiB (expt 1024. 2))
95 (GiB (expt 1024. 3))
96 (TiB (expt 1024. 4)))
97 (cond
98 ((< size KiB) (format #f "~dB" (nearest-exact-integer size)))
99 ((< size MiB) (format #f "~dKiB" (nearest-exact-integer (/ size KiB))))
100 ((< size GiB) (format #f "~,1fMiB" (/ size MiB)))
101 ((< size TiB) (format #f "~,2fGiB" (/ size GiB)))
102 (else (format #f "~,3fTiB" (/ size TiB))))))
103
104 (define* (progress-bar % #:optional (bar-width 20))
105 "Return % as a string representing an ASCII-art progress bar. The total
106 width of the bar is BAR-WIDTH."
107 (let* ((fraction (/ % 100))
108 (filled (inexact->exact (floor (* fraction bar-width))))
109 (empty (- bar-width filled)))
110 (format #f "[~a~a]"
111 (make-string filled #\#)
112 (make-string empty #\space))))
113
114 (define (string-pad-middle left right len)
115 "Combine LEFT and RIGHT with enough padding in the middle so that the
116 resulting string has length at least LEN (it may overflow). If the string
117 does not overflow, the last char in RIGHT will be flush with the LEN
118 column."
119 (let* ((total-used (+ (string-length left)
120 (string-length right)))
121 (num-spaces (max 1 (- len total-used)))
122 (padding (make-string num-spaces #\space)))
123 (string-append left padding right)))
124
125 (define* (ellipsis #:optional (port (current-output-port)))
126 "Make a rough guess at whether Unicode's HORIZONTAL ELLIPSIS can be written
127 in PORT's encoding, and return either that or ASCII dots."
128 (if (equal? (port-encoding port) "UTF-8")
129 "…"
130 "..."))
131
132 (define* (store-path-abbreviation store-path #:optional (prefix-length 6))
133 "If STORE-PATH is the file name of a store entry, return an abbreviation of
134 STORE-PATH for display, showing PREFIX-LENGTH characters of the hash.
135 Otherwise return STORE-PATH."
136 (if (string-prefix? (%store-directory) store-path)
137 (let ((base (basename store-path)))
138 (string-append (string-take base prefix-length)
139 (ellipsis)
140 (string-drop base 32)))
141 store-path))
142
143 (define* (progress-proc file size
144 #:optional (log-port (current-output-port))
145 #:key (abbreviation basename))
146 "Return a procedure to show the progress of FILE's download, which is SIZE
147 bytes long. The returned procedure is suitable for use as an argument to
148 `dump-port'. The progress report is written to LOG-PORT, with ABBREVIATION
149 used to shorten FILE for display."
150 ;; XXX: Because of <http://bugs.gnu.org/19939> this procedure is often not
151 ;; called as frequently as we'd like too; this is especially bad with Nginx
152 ;; on hydra.gnu.org, which returns whole nars as a single chunk.
153 (let ((start-time #f))
154 (let-syntax ((with-elapsed-time
155 (syntax-rules ()
156 ((_ elapsed body ...)
157 (let* ((now (current-time time-monotonic))
158 (elapsed (and start-time
159 (duration->seconds
160 (time-difference now
161 start-time)))))
162 (unless start-time
163 (set! start-time now))
164 body ...)))))
165 (if (number? size)
166 (lambda (transferred cont)
167 (with-elapsed-time elapsed
168 (let* ((% (* 100.0 (/ transferred size)))
169 (throughput (if elapsed
170 (/ transferred elapsed)
171 0))
172 (left (format #f " ~a ~a"
173 (abbreviation file)
174 (byte-count->string size)))
175 (right (format #f "~a/s ~a ~a~6,1f%"
176 (byte-count->string throughput)
177 (seconds->string elapsed)
178 (progress-bar %) %)))
179 (display "\r\x1b[K" log-port)
180 (display (string-pad-middle left right
181 (current-terminal-columns))
182 log-port)
183 (flush-output-port log-port)
184 (cont))))
185 (lambda (transferred cont)
186 (with-elapsed-time elapsed
187 (let* ((throughput (if elapsed
188 (/ transferred elapsed)
189 0))
190 (left (format #f " ~a"
191 (abbreviation file)))
192 (right (format #f "~a/s ~a | ~a transferred"
193 (byte-count->string throughput)
194 (seconds->string elapsed)
195 (byte-count->string transferred))))
196 (display "\r\x1b[K" log-port)
197 (display (string-pad-middle left right
198 (current-terminal-columns))
199 log-port)
200 (flush-output-port log-port)
201 (cont))))))))
202
203 (define* (uri-abbreviation uri #:optional (max-length 42))
204 "If URI's string representation is larger than MAX-LENGTH, return an
205 abbreviation of URI showing the scheme, host, and basename of the file."
206 (define uri-as-string
207 (uri->string uri))
208
209 (define (elide-path)
210 (let* ((path (uri-path uri))
211 (base (basename path))
212 (prefix (string-append (symbol->string (uri-scheme uri)) "://"
213
214 ;; `file' URIs have no host part.
215 (or (uri-host uri) "")
216
217 (string-append "/" (ellipsis) "/"))))
218 (if (> (+ (string-length prefix) (string-length base)) max-length)
219 (string-append prefix (ellipsis)
220 (string-drop base (quotient (string-length base) 2)))
221 (string-append prefix base))))
222
223 (if (> (string-length uri-as-string) max-length)
224 (let ((short (elide-path)))
225 (if (< (string-length short) (string-length uri-as-string))
226 short
227 uri-as-string))
228 uri-as-string))
229
230 (define (nar-uri-abbreviation uri)
231 "Abbreviate URI, which is assumed to be the URI of a nar as served by Hydra
232 and 'guix publish', something like
233 \"http://example.org/nar/1ldrllwbna0aw5z8kpci4fsvbd2w8cw4-texlive-bin-2015\"."
234 (let* ((uri (if (string? uri) (string->uri uri) uri))
235 (path (basename (uri-path uri))))
236 (if (and (> (string-length path) 33)
237 (char=? (string-ref path 32) #\-))
238 (string-drop path 33)
239 path)))
240
241 (define* (ftp-fetch uri file #:key timeout)
242 "Fetch data from URI and write it to FILE. Return FILE on success. Bail
243 out if the connection could not be established in less than TIMEOUT seconds."
244 (let* ((conn (ftp-open (uri-host uri) #:timeout timeout))
245 (size (false-if-exception (ftp-size conn (uri-path uri))))
246 (in (ftp-retr conn (basename (uri-path uri))
247 (dirname (uri-path uri)))))
248 (call-with-output-file file
249 (lambda (out)
250 (dump-port in out
251 #:buffer-size %http-receive-buffer-size
252 #:progress (progress-proc (uri-abbreviation uri) size))))
253
254 (ftp-close conn))
255 (newline)
256 file)
257
258 ;; Autoload GnuTLS so that this module can be used even when GnuTLS is
259 ;; not available. At compile time, this yields "possibly unbound
260 ;; variable" warnings, but these are OK: we know that the variables will
261 ;; be bound if we need them, because (guix download) adds GnuTLS as an
262 ;; input in that case.
263
264 ;; XXX: Use this hack instead of #:autoload to avoid compilation errors.
265 ;; See <http://bugs.gnu.org/12202>.
266 (module-autoload! (current-module)
267 '(gnutls) '(make-session connection-end/client))
268
269 (define %tls-ports
270 ;; Mapping of session record ports to the underlying file port.
271 (make-weak-key-hash-table))
272
273 (define (register-tls-record-port record-port port)
274 "Hold a weak reference from RECORD-PORT to PORT, where RECORD-PORT is a TLS
275 session record port using PORT as its underlying communication port."
276 (hashq-set! %tls-ports record-port port))
277
278 (define %x509-certificate-directory
279 ;; The directory where X.509 authority PEM certificates are stored.
280 (make-parameter (or (getenv "GUIX_TLS_CERTIFICATE_DIRECTORY")
281 (getenv "SSL_CERT_DIR")))) ;like OpenSSL
282
283 (define (make-credendials-with-ca-trust-files directory)
284 "Return certificate credentials with X.509 authority certificates read from
285 DIRECTORY. Those authority certificates are checked when
286 'peer-certificate-status' is later called."
287 (let ((cred (make-certificate-credentials))
288 (files (or (scandir directory
289 (lambda (file)
290 (string-suffix? ".pem" file)))
291 '())))
292 (for-each (lambda (file)
293 (let ((file (string-append directory "/" file)))
294 ;; Protect against dangling symlinks.
295 (when (file-exists? file)
296 (set-certificate-credentials-x509-trust-file!
297 cred file
298 x509-certificate-format/pem))))
299 (or files '()))
300 cred))
301
302 (define (peer-certificate session)
303 "Return the certificate of the remote peer in SESSION."
304 (match (session-peer-certificate-chain session)
305 ((first _ ...)
306 (import-x509-certificate first x509-certificate-format/der))))
307
308 (define (assert-valid-server-certificate session server)
309 "Return #t if the certificate of the remote peer for SESSION is a valid
310 certificate for SERVER, where SERVER is the expected host name of peer."
311 (define cert
312 (peer-certificate session))
313
314 ;; First check whether the server's certificate matches SERVER.
315 (unless (x509-certificate-matches-hostname? cert server)
316 (throw 'tls-certificate-error 'host-mismatch cert server))
317
318 ;; Second check its validity and reachability from the set of authority
319 ;; certificates loaded via 'set-certificate-credentials-x509-trust-file!'.
320 (match (peer-certificate-status session)
321 (() ;certificate is valid
322 #t)
323 ((statuses ...)
324 (throw 'tls-certificate-error 'invalid-certificate cert server
325 statuses))))
326
327 (define (print-tls-certificate-error port key args default-printer)
328 "Print the TLS certificate error represented by ARGS in an intelligible
329 way."
330 (match args
331 (('host-mismatch cert server)
332 (format port
333 "X.509 server certificate for '~a' does not match: ~a~%"
334 server (x509-certificate-dn cert)))
335 (('invalid-certificate cert server statuses)
336 (format port
337 "X.509 certificate of '~a' could not be verified:~%~{ ~a~%~}"
338 server
339 (map certificate-status->string statuses)))))
340
341 (set-exception-printer! 'tls-certificate-error
342 print-tls-certificate-error)
343
344 (define* (tls-wrap port server #:key (verify-certificate? #t))
345 "Return PORT wrapped in a TLS connection to SERVER. SERVER must be a DNS
346 host name without trailing dot."
347 (define (log level str)
348 (format (current-error-port)
349 "gnutls: [~a|~a] ~a" (getpid) level str))
350
351 (let ((session (make-session connection-end/client))
352 (ca-certs (%x509-certificate-directory)))
353
354 ;; Some servers such as 'cloud.github.com' require the client to support
355 ;; the 'SERVER NAME' extension. However, 'set-session-server-name!' is
356 ;; not available in older GnuTLS releases. See
357 ;; <http://bugs.gnu.org/18526> for details.
358 (if (module-defined? (resolve-interface '(gnutls))
359 'set-session-server-name!)
360 (set-session-server-name! session server-name-type/dns server)
361 (format (current-error-port)
362 "warning: TLS 'SERVER NAME' extension not supported~%"))
363
364 (set-session-transport-fd! session (fileno port))
365 (set-session-default-priority! session)
366
367 ;; The "%COMPAT" bit allows us to work around firewall issues (info
368 ;; "(gnutls) Priority Strings"); see <http://bugs.gnu.org/23311>.
369 ;; Explicitly disable SSLv3, which is insecure:
370 ;; <https://tools.ietf.org/html/rfc7568>.
371 (set-session-priorities! session "NORMAL:%COMPAT:-VERS-SSL3.0")
372
373 (set-session-credentials! session
374 (if (and verify-certificate? ca-certs)
375 (make-credendials-with-ca-trust-files
376 ca-certs)
377 (make-certificate-credentials)))
378
379 ;; Uncomment the following lines in case of debugging emergency.
380 ;;(set-log-level! 10)
381 ;;(set-log-procedure! log)
382
383 (handshake session)
384
385 ;; Verify the server's certificate if needed.
386 (when verify-certificate?
387 (catch 'tls-certificate-error
388 (lambda ()
389 (assert-valid-server-certificate session server))
390 (lambda args
391 (close-port port)
392 (apply throw args))))
393
394 (let ((record (session-record-port session)))
395 ;; Since we use `fileno' above, the file descriptor behind PORT would be
396 ;; closed when PORT is GC'd. If we used `port->fdes', it would instead
397 ;; never be closed. So we use `fileno', but keep a weak reference to
398 ;; PORT, so the file descriptor gets closed when RECORD is GC'd.
399 (register-tls-record-port record port)
400 record)))
401
402 (define (ensure-uri uri-or-string) ;XXX: copied from (web http)
403 (cond
404 ((string? uri-or-string) (string->uri uri-or-string))
405 ((uri? uri-or-string) uri-or-string)
406 (else (error "Invalid URI" uri-or-string))))
407
408 (define current-http-proxy
409 ;; XXX: Add a dummy definition for Guile < 2.0.10; this is used in
410 ;; 'open-socket-for-uri'.
411 (or (and=> (module-variable (resolve-interface '(web client))
412 'current-http-proxy)
413 variable-ref)
414 (const #f)))
415
416 (define* (open-socket-for-uri uri-or-string #:key timeout)
417 "Return an open input/output port for a connection to URI. When TIMEOUT is
418 not #f, it must be a (possibly inexact) number denoting the maximum duration
419 in seconds to wait for the connection to complete; passed TIMEOUT, an
420 ETIMEDOUT error is raised."
421 ;; Includes a fix for <http://bugs.gnu.org/15368> which affects Guile's
422 ;; 'open-socket-for-uri' up to 2.0.11 included, uses 'connect*' instead
423 ;; of 'connect', and uses AI_ADDRCONFIG.
424
425 (define http-proxy (current-http-proxy))
426 (define uri (ensure-uri (or http-proxy uri-or-string)))
427 (define addresses
428 (let ((port (uri-port uri)))
429 (delete-duplicates
430 (getaddrinfo (uri-host uri)
431 (cond (port => number->string)
432 (else (symbol->string (uri-scheme uri))))
433 (if (number? port)
434 (logior AI_ADDRCONFIG AI_NUMERICSERV)
435 AI_ADDRCONFIG))
436 (lambda (ai1 ai2)
437 (equal? (addrinfo:addr ai1) (addrinfo:addr ai2))))))
438
439 (let loop ((addresses addresses))
440 (let* ((ai (car addresses))
441 (s (with-fluids ((%default-port-encoding #f))
442 ;; Restrict ourselves to TCP.
443 (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP))))
444 (catch 'system-error
445 (lambda ()
446 (connect* s (addrinfo:addr ai) timeout)
447
448 ;; Buffer input and output on this port.
449 (setvbuf s _IOFBF)
450 ;; If we're using a proxy, make a note of that.
451 (when http-proxy (set-http-proxy-port?! s #t))
452 s)
453 (lambda args
454 ;; Connection failed, so try one of the other addresses.
455 (close s)
456 (if (null? (cdr addresses))
457 (apply throw args)
458 (loop (cdr addresses))))))))
459
460 (define* (open-connection-for-uri uri
461 #:key
462 timeout
463 (verify-certificate? #t))
464 "Like 'open-socket-for-uri', but also handle HTTPS connections. The
465 resulting port must be closed with 'close-connection'. When
466 VERIFY-CERTIFICATE? is true, verify HTTPS server certificates."
467 ;; Note: Guile 2.2.0's (web client) has a same-named export that's actually
468 ;; undefined. See Guile commit 011669af3b428e5626f7bbf66b11d57d9768c047.
469
470 (define https?
471 (eq? 'https (uri-scheme uri)))
472
473 (let-syntax ((with-https-proxy
474 (syntax-rules ()
475 ((_ exp)
476 ;; For HTTPS URIs, honor 'https_proxy', not 'http_proxy'.
477 ;; FIXME: Proxying is not supported for https.
478 (let ((thunk (lambda () exp)))
479 (if (and https?
480 (module-variable
481 (resolve-interface '(web client))
482 'current-http-proxy))
483 (parameterize ((current-http-proxy #f))
484 (when (and=> (getenv "https_proxy")
485 (negate string-null?))
486 (format (current-error-port)
487 "warning: 'https_proxy' is ignored~%"))
488 (thunk))
489 (thunk)))))))
490 (with-https-proxy
491 (let ((s (open-socket-for-uri uri #:timeout timeout)))
492 ;; Buffer input and output on this port.
493 (setvbuf s _IOFBF %http-receive-buffer-size)
494
495 (if https?
496 (tls-wrap s (uri-host uri)
497 #:verify-certificate? verify-certificate?)
498 s)))))
499
500 (define (close-connection port)
501 "Like 'close-port', but (1) idempotent, and (2) also closes the underlying
502 port if PORT is a TLS session record port."
503 ;; FIXME: This is a partial workaround for <http://bugs.gnu.org/20145>,
504 ;; because 'http-fetch' & co. may return a chunked input port whose 'close'
505 ;; method calls 'close-port', not 'close-connection'.
506 (unless (port-closed? port)
507 (close-port port))
508 (and=> (hashq-ref %tls-ports port)
509 close-connection))
510
511 ;; XXX: This is an awful hack to make sure the (set-port-encoding! p
512 ;; "ISO-8859-1") call in `read-response' passes, even during bootstrap
513 ;; where iconv is not available.
514 (module-define! (resolve-module '(web response))
515 'set-port-encoding!
516 (lambda (p e) #f))
517
518 ;; XXX: Work around <http://bugs.gnu.org/23421>, fixed in Guile commit
519 ;; 16050431f29d56f80c4a8253506fc851b8441840. Guile's date validation
520 ;; procedure rejects dates in which the hour is not padded with a zero but
521 ;; with whitespace.
522 (begin
523 (define-syntax string-match?
524 (lambda (x)
525 (syntax-case x ()
526 ((_ str pat) (string? (syntax->datum #'pat))
527 (let ((p (syntax->datum #'pat)))
528 #`(let ((s str))
529 (and
530 (= (string-length s) #,(string-length p))
531 #,@(let lp ((i 0) (tests '()))
532 (if (< i (string-length p))
533 (let ((c (string-ref p i)))
534 (lp (1+ i)
535 (case c
536 ((#\.) ; Whatever.
537 tests)
538 ((#\d) ; Digit.
539 (cons #`(char-numeric? (string-ref s #,i))
540 tests))
541 ((#\a) ; Alphabetic.
542 (cons #`(char-alphabetic? (string-ref s #,i))
543 tests))
544 (else ; Literal.
545 (cons #`(eqv? (string-ref s #,i) #,c)
546 tests)))))
547 tests)))))))))
548
549 (define (parse-rfc-822-date str space zone-offset)
550 (let ((parse-non-negative-integer (@@ (web http) parse-non-negative-integer))
551 (parse-month (@@ (web http) parse-month))
552 (bad-header (@@ (web http) bad-header)))
553 ;; We could verify the day of the week but we don't.
554 (cond ((string-match? (substring str 0 space) "aaa, dd aaa dddd dd:dd:dd")
555 (let ((date (parse-non-negative-integer str 5 7))
556 (month (parse-month str 8 11))
557 (year (parse-non-negative-integer str 12 16))
558 (hour (parse-non-negative-integer str 17 19))
559 (minute (parse-non-negative-integer str 20 22))
560 (second (parse-non-negative-integer str 23 25)))
561 (make-date 0 second minute hour date month year zone-offset)))
562 ((string-match? (substring str 0 space) "aaa, d aaa dddd dd:dd:dd")
563 (let ((date (parse-non-negative-integer str 5 6))
564 (month (parse-month str 7 10))
565 (year (parse-non-negative-integer str 11 15))
566 (hour (parse-non-negative-integer str 16 18))
567 (minute (parse-non-negative-integer str 19 21))
568 (second (parse-non-negative-integer str 22 24)))
569 (make-date 0 second minute hour date month year zone-offset)))
570
571 ;; The next two clauses match dates that have a space instead of
572 ;; a leading zero for hours, like " 8:49:37".
573 ((string-match? (substring str 0 space) "aaa, dd aaa dddd d:dd:dd")
574 (let ((date (parse-non-negative-integer str 5 7))
575 (month (parse-month str 8 11))
576 (year (parse-non-negative-integer str 12 16))
577 (hour (parse-non-negative-integer str 18 19))
578 (minute (parse-non-negative-integer str 20 22))
579 (second (parse-non-negative-integer str 23 25)))
580 (make-date 0 second minute hour date month year zone-offset)))
581 ((string-match? (substring str 0 space) "aaa, d aaa dddd d:dd:dd")
582 (let ((date (parse-non-negative-integer str 5 6))
583 (month (parse-month str 7 10))
584 (year (parse-non-negative-integer str 11 15))
585 (hour (parse-non-negative-integer str 17 18))
586 (minute (parse-non-negative-integer str 19 21))
587 (second (parse-non-negative-integer str 22 24)))
588 (make-date 0 second minute hour date month year zone-offset)))
589
590 (else
591 (bad-header 'date str) ; prevent tail call
592 #f))))
593 (module-set! (resolve-module '(web http))
594 'parse-rfc-822-date parse-rfc-822-date))
595
596 ;; XXX: Work around <http://bugs.gnu.org/19840>, present in Guile
597 ;; up to 2.0.11.
598 (unless (or (> (string->number (major-version)) 2)
599 (> (string->number (minor-version)) 0)
600 (> (string->number (micro-version)) 11))
601 (let ((var (module-variable (resolve-module '(web http))
602 'declare-relative-uri-header!)))
603 ;; If 'declare-relative-uri-header!' doesn't exist, forget it.
604 (when (and var (variable-bound? var))
605 (let ((declare-relative-uri-header! (variable-ref var)))
606 (declare-relative-uri-header! "Location")))))
607
608 (define (resolve-uri-reference ref base)
609 "Resolve the URI reference REF, interpreted relative to the BASE URI, into a
610 target URI, according to the algorithm specified in RFC 3986 section 5.2.2.
611 Return the resulting target URI."
612
613 (define (merge-paths base-path rel-path)
614 (let* ((base-components (string-split base-path #\/))
615 (base-directory-components (match base-components
616 ((components ... last) components)
617 (() '())))
618 (base-directory (string-join base-directory-components "/")))
619 (string-append base-directory "/" rel-path)))
620
621 (define (remove-dot-segments path)
622 (let loop ((in
623 ;; Drop leading "." and ".." components from a relative path.
624 ;; (absolute paths will start with a "" component)
625 (drop-while (match-lambda
626 ((or "." "..") #t)
627 (_ #f))
628 (string-split path #\/)))
629 (out '()))
630 (match in
631 (("." . rest)
632 (loop rest out))
633 ((".." . rest)
634 (match out
635 ((or () (""))
636 (error "remove-dot-segments: too many '..' components" path))
637 (_
638 (loop rest (cdr out)))))
639 ((component . rest)
640 (loop rest (cons component out)))
641 (()
642 (string-join (reverse out) "/")))))
643
644 (cond ((or (uri-scheme ref)
645 (uri-host ref))
646 (build-uri (or (uri-scheme ref)
647 (uri-scheme base))
648 #:userinfo (uri-userinfo ref)
649 #:host (uri-host ref)
650 #:port (uri-port ref)
651 #:path (remove-dot-segments (uri-path ref))
652 #:query (uri-query ref)
653 #:fragment (uri-fragment ref)))
654 ((string-null? (uri-path ref))
655 (build-uri (uri-scheme base)
656 #:userinfo (uri-userinfo base)
657 #:host (uri-host base)
658 #:port (uri-port base)
659 #:path (remove-dot-segments (uri-path base))
660 #:query (or (uri-query ref)
661 (uri-query base))
662 #:fragment (uri-fragment ref)))
663 (else
664 (build-uri (uri-scheme base)
665 #:userinfo (uri-userinfo base)
666 #:host (uri-host base)
667 #:port (uri-port base)
668 #:path (remove-dot-segments
669 (if (string-prefix? "/" (uri-path ref))
670 (uri-path ref)
671 (merge-paths (uri-path base)
672 (uri-path ref))))
673 #:query (uri-query ref)
674 #:fragment (uri-fragment ref)))))
675
676 (define* (http-fetch uri file #:key timeout (verify-certificate? #t))
677 "Fetch data from URI and write it to FILE; when TIMEOUT is true, bail out if
678 the connection could not be established in less than TIMEOUT seconds. Return
679 FILE on success. When VERIFY-CERTIFICATE? is true, verify HTTPS
680 certificates; otherwise simply ignore them."
681
682 (define headers
683 `(;; Some web sites, such as http://dist.schmorp.de, would block you if
684 ;; there's no 'User-Agent' header, presumably on the assumption that
685 ;; you're a spammer. So work around that.
686 (User-Agent . "GNU Guile")
687
688 ;; Some servers, such as https://alioth.debian.org, return "406 Not
689 ;; Acceptable" when not explicitly told that everything is accepted.
690 (Accept . "*/*")
691
692 ;; Basic authentication, if needed.
693 ,@(match (uri-userinfo uri)
694 ((? string? str)
695 `((Authorization . ,(string-append "Basic "
696 (base64-encode
697 (string->utf8 str))))))
698 (_ '()))))
699
700 (let*-values (((connection)
701 (open-connection-for-uri uri
702 #:timeout timeout
703 #:verify-certificate?
704 verify-certificate?))
705 ((resp bv-or-port)
706 (http-get uri #:port connection #:decode-body? #f
707 #:streaming? #t
708 #:headers headers))
709 ((code)
710 (response-code resp))
711 ((size)
712 (response-content-length resp)))
713 (case code
714 ((200) ; OK
715 (begin
716 (call-with-output-file file
717 (lambda (p)
718 (if (port? bv-or-port)
719 (begin
720 (dump-port bv-or-port p
721 #:buffer-size %http-receive-buffer-size
722 #:progress (progress-proc (uri-abbreviation uri)
723 size))
724 (newline))
725 (put-bytevector p bv-or-port))))
726 file))
727 ((301 ; moved permanently
728 302 ; found (redirection)
729 307) ; temporary redirection
730 (let ((uri (resolve-uri-reference (response-location resp) uri)))
731 (format #t "following redirection to `~a'...~%"
732 (uri->string uri))
733 (close connection)
734 (http-fetch uri file
735 #:timeout timeout
736 #:verify-certificate? verify-certificate?)))
737 (else
738 (error "download failed" (uri->string uri)
739 code (response-reason-phrase resp))))))
740
741 \f
742 (define-syntax-rule (false-if-exception* body ...)
743 "Like `false-if-exception', but print the exception on the error port."
744 (catch #t
745 (lambda ()
746 body ...)
747 (lambda (key . args)
748 #f)
749 (lambda (key . args)
750 (print-exception (current-error-port) #f key args))))
751
752 (define (uri-vicinity dir file)
753 "Concatenate DIR, slash, and FILE, keeping only one slash in between.
754 This is required by some HTTP servers."
755 (string-append (string-trim-right dir #\/) "/"
756 (string-trim file #\/)))
757
758 (define (maybe-expand-mirrors uri mirrors)
759 "If URI uses the 'mirror' scheme, expand it according to the MIRRORS alist.
760 Return a list of URIs."
761 (case (uri-scheme uri)
762 ((mirror)
763 (let ((kind (string->symbol (uri-host uri)))
764 (path (uri-path uri)))
765 (match (assoc-ref mirrors kind)
766 ((mirrors ..1)
767 (map (compose string->uri (cut uri-vicinity <> path))
768 mirrors))
769 (_
770 (error "unsupported URL mirror kind" kind uri)))))
771 (else
772 (list uri))))
773
774 (define* (url-fetch url file
775 #:key
776 (timeout 10) (verify-certificate? #t)
777 (mirrors '()) (content-addressed-mirrors '())
778 (hashes '()))
779 "Fetch FILE from URL; URL may be either a single string, or a list of
780 string denoting alternate URLs for FILE. Return #f on failure, and FILE
781 on success.
782
783 When MIRRORS is defined, it must be an alist of mirrors; it is used to resolve
784 'mirror://' URIs.
785
786 HASHES must be a list of algorithm/hash pairs, where each algorithm is a
787 symbol such as 'sha256 and each hash is a bytevector.
788 CONTENT-ADDRESSED-MIRRORS must be a list of procedures that, given a hash
789 algorithm and a hash, return a URL where the specified data can be retrieved
790 or #f.
791
792 When VERIFY-CERTIFICATE? is true, validate HTTPS server certificates;
793 otherwise simply ignore them."
794 (define uri
795 (append-map (cut maybe-expand-mirrors <> mirrors)
796 (match url
797 ((_ ...) (map string->uri url))
798 (_ (list (string->uri url))))))
799
800 (define (fetch uri file)
801 (format #t "~%Starting download of ~a~%From ~a...~%"
802 file (uri->string uri))
803 (case (uri-scheme uri)
804 ((http https)
805 (false-if-exception* (http-fetch uri file
806 #:verify-certificate?
807 verify-certificate?
808 #:timeout timeout)))
809 ((ftp)
810 (false-if-exception* (ftp-fetch uri file
811 #:timeout timeout)))
812 (else
813 (format #t "skipping URI with unsupported scheme: ~s~%"
814 uri)
815 #f)))
816
817 (define content-addressed-uris
818 (append-map (lambda (make-url)
819 (filter-map (match-lambda
820 ((hash-algo . hash)
821 (let ((file (strip-store-file-name file)))
822 (string->uri (make-url file hash-algo hash)))))
823 hashes))
824 content-addressed-mirrors))
825
826 ;; Make this unbuffered so 'progress-proc' works as expected. _IOLBF means
827 ;; '\n', not '\r', so it's not appropriate here.
828 (setvbuf (current-output-port) _IONBF)
829
830 (setvbuf (current-error-port) _IOLBF)
831
832 (let try ((uri (append uri content-addressed-uris)))
833 (match uri
834 ((uri tail ...)
835 (or (fetch uri file)
836 (try tail)))
837 (()
838 (format (current-error-port) "failed to download ~s from ~s~%"
839 file url)
840 #f))))
841
842 ;;; Local Variables:
843 ;;; eval: (put 'with-elapsed-time 'scheme-indent-function 1)
844 ;;; End:
845
846 ;;; download.scm ends here