Commit | Line | Data |
---|---|---|
4155e2a9 | 1 | ;;; GNU Guix --- Functional package management for GNU |
279d932b | 2 | ;;; Copyright © 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org> |
04dec194 | 3 | ;;; Copyright © 2015 Mark H Weaver <mhw@netris.org> |
57d28987 | 4 | ;;; Copyright © 2017 Tobias Geerinckx-Rice <me@tobias.gr> |
62cab99c | 5 | ;;; |
4155e2a9 | 6 | ;;; This file is part of GNU Guix. |
62cab99c | 7 | ;;; |
4155e2a9 | 8 | ;;; GNU Guix is free software; you can redistribute it and/or modify it |
62cab99c LC |
9 | ;;; under the terms of the GNU General Public License as published by |
10 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
11 | ;;; your option) any later version. | |
12 | ;;; | |
4155e2a9 | 13 | ;;; GNU Guix is distributed in the hope that it will be useful, but |
62cab99c LC |
14 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of |
15 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
16 | ;;; GNU General Public License for more details. | |
17 | ;;; | |
18 | ;;; You should have received a copy of the GNU General Public License | |
4155e2a9 | 19 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. |
62cab99c LC |
20 | |
21 | (define-module (guix build download) | |
22 | #:use-module (web uri) | |
60fd5122 | 23 | #:use-module (web http) |
76238483 | 24 | #:use-module ((web client) #:hide (open-socket-for-uri)) |
62cab99c | 25 | #:use-module (web response) |
242ad41c | 26 | #:use-module (guix base64) |
62cab99c LC |
27 | #:use-module (guix ftp-client) |
28 | #:use-module (guix build utils) | |
8c348825 | 29 | #:use-module (guix progress) |
62cab99c | 30 | #:use-module (rnrs io ports) |
242ad41c | 31 | #:use-module (rnrs bytevectors) |
94d222ad | 32 | #:use-module (srfi srfi-1) |
62cab99c | 33 | #:use-module (srfi srfi-11) |
9fbe6f19 | 34 | #:use-module (srfi srfi-19) |
94d222ad | 35 | #:use-module (srfi srfi-26) |
bc3c41ce | 36 | #:autoload (ice-9 ftw) (scandir) |
62cab99c | 37 | #:use-module (ice-9 match) |
e47bac79 | 38 | #:use-module (ice-9 format) |
76238483 LC |
39 | #:export (open-socket-for-uri |
40 | open-connection-for-uri | |
347fa4ae | 41 | http-fetch |
b111bcee | 42 | %x509-certificate-directory |
097a951e | 43 | close-connection |
04dec194 | 44 | resolve-uri-reference |
dd8ea244 | 45 | maybe-expand-mirrors |
a3bf0969 | 46 | url-fetch |
a8be7b9a | 47 | byte-count->string |
a8be7b9a | 48 | uri-abbreviation |
cf5e5829 | 49 | nar-uri-abbreviation |
a8be7b9a | 50 | store-path-abbreviation)) |
62cab99c LC |
51 | |
52 | ;;; Commentary: | |
53 | ;;; | |
54 | ;;; Fetch data such as tarballs over HTTP or FTP (builder-side code). | |
55 | ;;; | |
56 | ;;; Code: | |
57 | ||
e7620dc9 LC |
58 | (define %http-receive-buffer-size |
59 | ;; Size of the HTTP receive buffer. | |
60 | 65536) | |
61 | ||
fde17830 LC |
62 | (define* (ellipsis #:optional (port (current-output-port))) |
63 | "Make a rough guess at whether Unicode's HORIZONTAL ELLIPSIS can be written | |
64 | in PORT's encoding, and return either that or ASCII dots." | |
65 | (if (equal? (port-encoding port) "UTF-8") | |
66 | "…" | |
67 | "...")) | |
68 | ||
a8be7b9a | 69 | (define* (store-path-abbreviation store-path #:optional (prefix-length 6)) |
75726135 LC |
70 | "If STORE-PATH is the file name of a store entry, return an abbreviation of |
71 | STORE-PATH for display, showing PREFIX-LENGTH characters of the hash. | |
72 | Otherwise return STORE-PATH." | |
73 | (if (string-prefix? (%store-directory) store-path) | |
74 | (let ((base (basename store-path))) | |
75 | (string-append (string-take base prefix-length) | |
fde17830 | 76 | (ellipsis) |
75726135 LC |
77 | (string-drop base 32))) |
78 | store-path)) | |
a8be7b9a | 79 | |
28e55604 LC |
80 | (define* (uri-abbreviation uri #:optional (max-length 42)) |
81 | "If URI's string representation is larger than MAX-LENGTH, return an | |
82 | abbreviation of URI showing the scheme, host, and basename of the file." | |
83 | (define uri-as-string | |
84 | (uri->string uri)) | |
85 | ||
86 | (define (elide-path) | |
3e31ec82 LC |
87 | (let* ((path (uri-path uri)) |
88 | (base (basename path)) | |
89 | (prefix (string-append (symbol->string (uri-scheme uri)) "://" | |
90 | ||
91 | ;; `file' URIs have no host part. | |
92 | (or (uri-host uri) "") | |
93 | ||
94 | (string-append "/" (ellipsis) "/")))) | |
95 | (if (> (+ (string-length prefix) (string-length base)) max-length) | |
96 | (string-append prefix (ellipsis) | |
97 | (string-drop base (quotient (string-length base) 2))) | |
98 | (string-append prefix base)))) | |
28e55604 LC |
99 | |
100 | (if (> (string-length uri-as-string) max-length) | |
101 | (let ((short (elide-path))) | |
102 | (if (< (string-length short) (string-length uri-as-string)) | |
103 | short | |
104 | uri-as-string)) | |
105 | uri-as-string)) | |
106 | ||
cf5e5829 LC |
107 | (define (nar-uri-abbreviation uri) |
108 | "Abbreviate URI, which is assumed to be the URI of a nar as served by Hydra | |
109 | and 'guix publish', something like | |
110 | \"http://example.org/nar/1ldrllwbna0aw5z8kpci4fsvbd2w8cw4-texlive-bin-2015\"." | |
111 | (let* ((uri (if (string? uri) (string->uri uri) uri)) | |
112 | (path (basename (uri-path uri)))) | |
113 | (if (and (> (string-length path) 33) | |
114 | (char=? (string-ref path 32) #\-)) | |
115 | (string-drop path 33) | |
116 | path))) | |
117 | ||
240a9c69 | 118 | (define* (ftp-fetch uri file #:key timeout print-build-trace?) |
b18ede27 LC |
119 | "Fetch data from URI and write it to FILE. Return FILE on success. Bail |
120 | out if the connection could not be established in less than TIMEOUT seconds." | |
6dfd683d LC |
121 | (let* ((conn (match (and=> (uri-userinfo uri) |
122 | (cut string-split <> #\:)) | |
a4f54234 RJ |
123 | (((? string? user)) |
124 | (ftp-open (uri-host uri) #:timeout timeout | |
125 | #:username user)) | |
126 | (((? string? user) (? string? pass)) | |
127 | (ftp-open (uri-host uri) #:timeout timeout | |
128 | #:username user | |
129 | #:password pass)) | |
130 | (_ (ftp-open (uri-host uri) #:timeout timeout)))) | |
e47bac79 | 131 | (size (false-if-exception (ftp-size conn (uri-path uri)))) |
62cab99c | 132 | (in (ftp-retr conn (basename (uri-path uri)) |
9f860595 LC |
133 | (dirname (uri-path uri)) |
134 | #:timeout timeout))) | |
62cab99c LC |
135 | (call-with-output-file file |
136 | (lambda (out) | |
79864851 SB |
137 | (dump-port* in out |
138 | #:buffer-size %http-receive-buffer-size | |
240a9c69 LC |
139 | #:reporter |
140 | (if print-build-trace? | |
141 | (progress-reporter/trace | |
142 | file (uri->string uri) size) | |
143 | (progress-reporter/file | |
144 | (uri-abbreviation uri) size))))) | |
145 | ||
146 | (ftp-close conn) | |
147 | (unless print-build-trace? | |
148 | (newline)) | |
149 | file)) | |
62cab99c | 150 | |
483f1158 LC |
151 | ;; Autoload GnuTLS so that this module can be used even when GnuTLS is |
152 | ;; not available. At compile time, this yields "possibly unbound | |
153 | ;; variable" warnings, but these are OK: we know that the variables will | |
154 | ;; be bound if we need them, because (guix download) adds GnuTLS as an | |
155 | ;; input in that case. | |
156 | ||
6a7c4636 LC |
157 | (define (load-gnutls) |
158 | ;; XXX: Use this hack instead of #:autoload to avoid compilation errors. | |
159 | ;; See <http://bugs.gnu.org/12202>. | |
058d0251 LC |
160 | (module-use! (resolve-module '(guix build download)) |
161 | (resolve-interface '(gnutls))) | |
6a7c4636 | 162 | (set! load-gnutls (const #t))) |
483f1158 | 163 | |
bc3c41ce LC |
164 | (define %x509-certificate-directory |
165 | ;; The directory where X.509 authority PEM certificates are stored. | |
166 | (make-parameter (or (getenv "GUIX_TLS_CERTIFICATE_DIRECTORY") | |
674e143c LC |
167 | (getenv "SSL_CERT_DIR") ;like OpenSSL |
168 | "/etc/ssl/certs"))) | |
bc3c41ce | 169 | |
27fd13c3 LC |
170 | (define (set-certificate-credentials-x509-trust-file!* cred file format) |
171 | "Like 'set-certificate-credentials-x509-trust-file!', but without the file | |
172 | name decoding bug described at | |
173 | <https://debbugs.gnu.org/cgi/bugreport.cgi?bug=26948#17>." | |
174 | (let ((data (call-with-input-file file get-bytevector-all))) | |
175 | (set-certificate-credentials-x509-trust-data! cred data format))) | |
176 | ||
bc3c41ce LC |
177 | (define (make-credendials-with-ca-trust-files directory) |
178 | "Return certificate credentials with X.509 authority certificates read from | |
179 | DIRECTORY. Those authority certificates are checked when | |
180 | 'peer-certificate-status' is later called." | |
181 | (let ((cred (make-certificate-credentials)) | |
0d78d0f0 LC |
182 | (files (match (scandir directory (cut string-suffix? ".pem" <>)) |
183 | ((or #f ()) | |
184 | ;; Some distros provide nothing but bundles (*.crt) under | |
185 | ;; /etc/ssl/certs, so look for them. | |
186 | (or (scandir directory (cut string-suffix? ".crt" <>)) | |
187 | '())) | |
188 | (pem pem)))) | |
bc3c41ce | 189 | (for-each (lambda (file) |
580deec5 LC |
190 | (let ((file (string-append directory "/" file))) |
191 | ;; Protect against dangling symlinks. | |
192 | (when (file-exists? file) | |
27fd13c3 | 193 | (set-certificate-credentials-x509-trust-file!* |
580deec5 LC |
194 | cred file |
195 | x509-certificate-format/pem)))) | |
0d78d0f0 | 196 | files) |
bc3c41ce LC |
197 | cred)) |
198 | ||
199 | (define (peer-certificate session) | |
200 | "Return the certificate of the remote peer in SESSION." | |
201 | (match (session-peer-certificate-chain session) | |
202 | ((first _ ...) | |
203 | (import-x509-certificate first x509-certificate-format/der)))) | |
204 | ||
205 | (define (assert-valid-server-certificate session server) | |
206 | "Return #t if the certificate of the remote peer for SESSION is a valid | |
207 | certificate for SERVER, where SERVER is the expected host name of peer." | |
208 | (define cert | |
209 | (peer-certificate session)) | |
210 | ||
211 | ;; First check whether the server's certificate matches SERVER. | |
212 | (unless (x509-certificate-matches-hostname? cert server) | |
213 | (throw 'tls-certificate-error 'host-mismatch cert server)) | |
214 | ||
215 | ;; Second check its validity and reachability from the set of authority | |
216 | ;; certificates loaded via 'set-certificate-credentials-x509-trust-file!'. | |
217 | (match (peer-certificate-status session) | |
218 | (() ;certificate is valid | |
219 | #t) | |
220 | ((statuses ...) | |
221 | (throw 'tls-certificate-error 'invalid-certificate cert server | |
222 | statuses)))) | |
223 | ||
224 | (define (print-tls-certificate-error port key args default-printer) | |
225 | "Print the TLS certificate error represented by ARGS in an intelligible | |
226 | way." | |
227 | (match args | |
228 | (('host-mismatch cert server) | |
229 | (format port | |
230 | "X.509 server certificate for '~a' does not match: ~a~%" | |
231 | server (x509-certificate-dn cert))) | |
232 | (('invalid-certificate cert server statuses) | |
233 | (format port | |
234 | "X.509 certificate of '~a' could not be verified:~%~{ ~a~%~}" | |
235 | server | |
236 | (map certificate-status->string statuses))))) | |
237 | ||
238 | (set-exception-printer! 'tls-certificate-error | |
239 | print-tls-certificate-error) | |
240 | ||
241 | (define* (tls-wrap port server #:key (verify-certificate? #t)) | |
077bd18d LC |
242 | "Return PORT wrapped in a TLS connection to SERVER. SERVER must be a DNS |
243 | host name without trailing dot." | |
483f1158 LC |
244 | (define (log level str) |
245 | (format (current-error-port) | |
246 | "gnutls: [~a|~a] ~a" (getpid) level str)) | |
247 | ||
6a7c4636 | 248 | (load-gnutls) |
bc3c41ce LC |
249 | (let ((session (make-session connection-end/client)) |
250 | (ca-certs (%x509-certificate-directory))) | |
077bd18d LC |
251 | |
252 | ;; Some servers such as 'cloud.github.com' require the client to support | |
253 | ;; the 'SERVER NAME' extension. However, 'set-session-server-name!' is | |
254 | ;; not available in older GnuTLS releases. See | |
255 | ;; <http://bugs.gnu.org/18526> for details. | |
256 | (if (module-defined? (resolve-interface '(gnutls)) | |
257 | 'set-session-server-name!) | |
258 | (set-session-server-name! session server-name-type/dns server) | |
259 | (format (current-error-port) | |
260 | "warning: TLS 'SERVER NAME' extension not supported~%")) | |
261 | ||
483f1158 LC |
262 | (set-session-transport-fd! session (fileno port)) |
263 | (set-session-default-priority! session) | |
967ee481 LC |
264 | |
265 | ;; The "%COMPAT" bit allows us to work around firewall issues (info | |
266 | ;; "(gnutls) Priority Strings"); see <http://bugs.gnu.org/23311>. | |
267 | ;; Explicitly disable SSLv3, which is insecure: | |
268 | ;; <https://tools.ietf.org/html/rfc7568>. | |
621fb83a | 269 | (set-session-priorities! session "NORMAL:%COMPAT:-VERS-SSL3.0") |
967ee481 | 270 | |
bc3c41ce LC |
271 | (set-session-credentials! session |
272 | (if (and verify-certificate? ca-certs) | |
273 | (make-credendials-with-ca-trust-files | |
274 | ca-certs) | |
275 | (make-certificate-credentials))) | |
483f1158 LC |
276 | |
277 | ;; Uncomment the following lines in case of debugging emergency. | |
278 | ;;(set-log-level! 10) | |
279 | ;;(set-log-procedure! log) | |
280 | ||
7b9ac883 LC |
281 | (catch 'gnutls-error |
282 | (lambda () | |
283 | (handshake session)) | |
284 | (lambda (key err proc . rest) | |
285 | (cond ((eq? err error/warning-alert-received) | |
286 | ;; Like Wget, do no stop upon non-fatal alerts such as | |
287 | ;; 'alert-description/unrecognized-name'. | |
288 | (format (current-error-port) | |
289 | "warning: TLS warning alert received: ~a~%" | |
290 | (alert-description->string (alert-get session))) | |
291 | (handshake session)) | |
292 | (else | |
293 | ;; XXX: We'd use 'gnutls_error_is_fatal' but (gnutls) doesn't | |
294 | ;; provide a binding for this. | |
295 | (apply throw key err proc rest))))) | |
bc3c41ce LC |
296 | |
297 | ;; Verify the server's certificate if needed. | |
298 | (when verify-certificate? | |
299 | (catch 'tls-certificate-error | |
300 | (lambda () | |
301 | (assert-valid-server-certificate session server)) | |
302 | (lambda args | |
303 | (close-port port) | |
304 | (apply throw args)))) | |
305 | ||
dd9afe64 | 306 | (let ((record (session-record-port session))) |
f4cde9ac | 307 | (define (read! bv start count) |
279d932b LC |
308 | (let ((read (get-bytevector-n! record bv start count))) |
309 | (if (eof-object? read) | |
310 | 0 | |
311 | read))) | |
f4cde9ac LC |
312 | (define (write! bv start count) |
313 | (put-bytevector record bv start count) | |
314 | (force-output record) | |
315 | count) | |
316 | (define (get-position) | |
317 | (port-position record)) | |
318 | (define (set-position! new-position) | |
319 | (set-port-position! record new-position)) | |
320 | (define (close) | |
321 | (unless (port-closed? port) | |
322 | (close-port port)) | |
323 | (unless (port-closed? record) | |
324 | (close-port record))) | |
325 | ||
b168acae LC |
326 | (define (unbuffered port) |
327 | (setvbuf port 'none) | |
328 | port) | |
329 | ||
f4cde9ac LC |
330 | (setvbuf record 'block) |
331 | ||
332 | ;; Return a port that wraps RECORD to ensure that closing it also | |
333 | ;; closes PORT, the actual socket port, and its file descriptor. | |
b168acae LC |
334 | ;; Make sure it does not introduce extra buffering (custom ports |
335 | ;; are buffered by default as of Guile 3.0.5). | |
f4cde9ac LC |
336 | ;; XXX: This wrapper would be unnecessary if GnuTLS could |
337 | ;; automatically close SESSION's file descriptor when RECORD is | |
338 | ;; closed, but that doesn't seem to be possible currently (as of | |
339 | ;; 3.6.9). | |
b168acae LC |
340 | (unbuffered |
341 | (make-custom-binary-input/output-port "gnutls wrapped port" read! write! | |
342 | get-position set-position! | |
343 | close))))) | |
483f1158 | 344 | |
60fd5122 LC |
345 | (define (ensure-uri uri-or-string) ;XXX: copied from (web http) |
346 | (cond | |
347 | ((string? uri-or-string) (string->uri uri-or-string)) | |
348 | ((uri? uri-or-string) uri-or-string) | |
349 | (else (error "Invalid URI" uri-or-string)))) | |
350 | ||
60fd5122 LC |
351 | (define* (open-socket-for-uri uri-or-string #:key timeout) |
352 | "Return an open input/output port for a connection to URI. When TIMEOUT is | |
353 | not #f, it must be a (possibly inexact) number denoting the maximum duration | |
354 | in seconds to wait for the connection to complete; passed TIMEOUT, an | |
355 | ETIMEDOUT error is raised." | |
356 | ;; Includes a fix for <http://bugs.gnu.org/15368> which affects Guile's | |
1b9aefa3 LC |
357 | ;; 'open-socket-for-uri' up to 2.0.11 included, uses 'connect*' instead |
358 | ;; of 'connect', and uses AI_ADDRCONFIG. | |
60fd5122 LC |
359 | |
360 | (define http-proxy (current-http-proxy)) | |
361 | (define uri (ensure-uri (or http-proxy uri-or-string))) | |
362 | (define addresses | |
363 | (let ((port (uri-port uri))) | |
364 | (delete-duplicates | |
365 | (getaddrinfo (uri-host uri) | |
366 | (cond (port => number->string) | |
367 | (else (symbol->string (uri-scheme uri)))) | |
1b9aefa3 LC |
368 | (if (number? port) |
369 | (logior AI_ADDRCONFIG AI_NUMERICSERV) | |
370 | AI_ADDRCONFIG)) | |
60fd5122 LC |
371 | (lambda (ai1 ai2) |
372 | (equal? (addrinfo:addr ai1) (addrinfo:addr ai2)))))) | |
373 | ||
374 | (let loop ((addresses addresses)) | |
375 | (let* ((ai (car addresses)) | |
376 | (s (with-fluids ((%default-port-encoding #f)) | |
377 | ;; Restrict ourselves to TCP. | |
378 | (socket (addrinfo:fam ai) SOCK_STREAM IPPROTO_IP)))) | |
379 | (catch 'system-error | |
380 | (lambda () | |
381 | (connect* s (addrinfo:addr ai) timeout) | |
382 | ||
383 | ;; Buffer input and output on this port. | |
76832d34 | 384 | (setvbuf s 'block) |
60fd5122 LC |
385 | ;; If we're using a proxy, make a note of that. |
386 | (when http-proxy (set-http-proxy-port?! s #t)) | |
387 | s) | |
388 | (lambda args | |
389 | ;; Connection failed, so try one of the other addresses. | |
390 | (close s) | |
391 | (if (null? (cdr addresses)) | |
392 | (apply throw args) | |
393 | (loop (cdr addresses)))))))) | |
394 | ||
9bc8175c SB |
395 | (define (setup-http-tunnel port uri) |
396 | "Establish over PORT an HTTP tunnel to the destination server of URI." | |
397 | (define target | |
398 | (string-append (uri-host uri) ":" | |
399 | (number->string | |
400 | (or (uri-port uri) | |
401 | (match (uri-scheme uri) | |
402 | ('http 80) | |
403 | ('https 443)))))) | |
404 | (format port "CONNECT ~a HTTP/1.1\r\n" target) | |
405 | (format port "Host: ~a\r\n\r\n" target) | |
406 | (force-output port) | |
407 | (read-response port)) | |
408 | ||
bc3c41ce LC |
409 | (define* (open-connection-for-uri uri |
410 | #:key | |
411 | timeout | |
412 | (verify-certificate? #t)) | |
097a951e | 413 | "Like 'open-socket-for-uri', but also handle HTTPS connections. The |
bc3c41ce LC |
414 | resulting port must be closed with 'close-connection'. When |
415 | VERIFY-CERTIFICATE? is true, verify HTTPS server certificates." | |
4fd06a4d LC |
416 | ;; Note: Guile 2.2.0's (web client) has a same-named export that's actually |
417 | ;; undefined. See Guile commit 011669af3b428e5626f7bbf66b11d57d9768c047. | |
418 | ||
d17551d9 LC |
419 | (define https? |
420 | (eq? 'https (uri-scheme uri))) | |
421 | ||
9bc8175c SB |
422 | (define https-proxy (let ((proxy (getenv "https_proxy"))) |
423 | (and (not (equal? proxy "")) | |
424 | proxy))) | |
425 | ||
d17551d9 LC |
426 | (let-syntax ((with-https-proxy |
427 | (syntax-rules () | |
428 | ((_ exp) | |
429 | ;; For HTTPS URIs, honor 'https_proxy', not 'http_proxy'. | |
d17551d9 LC |
430 | (let ((thunk (lambda () exp))) |
431 | (if (and https? | |
432 | (module-variable | |
433 | (resolve-interface '(web client)) | |
434 | 'current-http-proxy)) | |
9bc8175c | 435 | (parameterize ((current-http-proxy https-proxy)) |
d17551d9 LC |
436 | (thunk)) |
437 | (thunk))))))) | |
438 | (with-https-proxy | |
60fd5122 | 439 | (let ((s (open-socket-for-uri uri #:timeout timeout))) |
c822fb8e | 440 | ;; Buffer input and output on this port. |
76832d34 | 441 | (setvbuf s 'block %http-receive-buffer-size) |
c822fb8e | 442 | |
9bc8175c SB |
443 | (when (and https? https-proxy) |
444 | (setup-http-tunnel s uri)) | |
445 | ||
d17551d9 | 446 | (if https? |
bc3c41ce LC |
447 | (tls-wrap s (uri-host uri) |
448 | #:verify-certificate? verify-certificate?) | |
d17551d9 | 449 | s))))) |
62cab99c | 450 | |
f4cde9ac | 451 | (define (close-connection port) ;deprecated |
097a951e | 452 | (unless (port-closed? port) |
f4cde9ac | 453 | (close-port port))) |
097a951e | 454 | |
62cab99c LC |
455 | ;; XXX: This is an awful hack to make sure the (set-port-encoding! p |
456 | ;; "ISO-8859-1") call in `read-response' passes, even during bootstrap | |
457 | ;; where iconv is not available. | |
458 | (module-define! (resolve-module '(web response)) | |
459 | 'set-port-encoding! | |
460 | (lambda (p e) #f)) | |
461 | ||
04dec194 MW |
462 | (define (resolve-uri-reference ref base) |
463 | "Resolve the URI reference REF, interpreted relative to the BASE URI, into a | |
464 | target URI, according to the algorithm specified in RFC 3986 section 5.2.2. | |
465 | Return the resulting target URI." | |
466 | ||
467 | (define (merge-paths base-path rel-path) | |
468 | (let* ((base-components (string-split base-path #\/)) | |
469 | (base-directory-components (match base-components | |
470 | ((components ... last) components) | |
471 | (() '()))) | |
472 | (base-directory (string-join base-directory-components "/"))) | |
473 | (string-append base-directory "/" rel-path))) | |
474 | ||
475 | (define (remove-dot-segments path) | |
476 | (let loop ((in | |
477 | ;; Drop leading "." and ".." components from a relative path. | |
478 | ;; (absolute paths will start with a "" component) | |
479 | (drop-while (match-lambda | |
480 | ((or "." "..") #t) | |
481 | (_ #f)) | |
482 | (string-split path #\/))) | |
483 | (out '())) | |
484 | (match in | |
485 | (("." . rest) | |
486 | (loop rest out)) | |
487 | ((".." . rest) | |
488 | (match out | |
489 | ((or () ("")) | |
490 | (error "remove-dot-segments: too many '..' components" path)) | |
491 | (_ | |
492 | (loop rest (cdr out))))) | |
493 | ((component . rest) | |
494 | (loop rest (cons component out))) | |
495 | (() | |
496 | (string-join (reverse out) "/"))))) | |
497 | ||
498 | (cond ((or (uri-scheme ref) | |
499 | (uri-host ref)) | |
500 | (build-uri (or (uri-scheme ref) | |
501 | (uri-scheme base)) | |
502 | #:userinfo (uri-userinfo ref) | |
503 | #:host (uri-host ref) | |
504 | #:port (uri-port ref) | |
505 | #:path (remove-dot-segments (uri-path ref)) | |
506 | #:query (uri-query ref) | |
507 | #:fragment (uri-fragment ref))) | |
508 | ((string-null? (uri-path ref)) | |
509 | (build-uri (uri-scheme base) | |
510 | #:userinfo (uri-userinfo base) | |
511 | #:host (uri-host base) | |
512 | #:port (uri-port base) | |
513 | #:path (remove-dot-segments (uri-path base)) | |
514 | #:query (or (uri-query ref) | |
515 | (uri-query base)) | |
516 | #:fragment (uri-fragment ref))) | |
517 | (else | |
518 | (build-uri (uri-scheme base) | |
519 | #:userinfo (uri-userinfo base) | |
520 | #:host (uri-host base) | |
521 | #:port (uri-port base) | |
522 | #:path (remove-dot-segments | |
523 | (if (string-prefix? "/" (uri-path ref)) | |
524 | (uri-path ref) | |
525 | (merge-paths (uri-path base) | |
526 | (uri-path ref)))) | |
527 | #:query (uri-query ref) | |
528 | #:fragment (uri-fragment ref))))) | |
529 | ||
347fa4ae LC |
530 | (define* (http-fetch uri #:key timeout (verify-certificate? #t)) |
531 | "Return an input port containing the data at URI, and the expected number of | |
532 | bytes available or #f. When TIMEOUT is true, bail out if the connection could | |
533 | not be established in less than TIMEOUT seconds. When VERIFY-CERTIFICATE? is | |
534 | true, verify HTTPS certificates; otherwise simply ignore them." | |
62cab99c | 535 | |
2de227af | 536 | (define headers |
242ad41c | 537 | `(;; Some web sites, such as http://dist.schmorp.de, would block you if |
38bf090e LC |
538 | ;; there's no 'User-Agent' header, presumably on the assumption that |
539 | ;; you're a spammer. So work around that. | |
540 | (User-Agent . "GNU Guile") | |
541 | ||
542 | ;; Some servers, such as https://alioth.debian.org, return "406 Not | |
543 | ;; Acceptable" when not explicitly told that everything is accepted. | |
242ad41c DT |
544 | (Accept . "*/*") |
545 | ||
546 | ;; Basic authentication, if needed. | |
547 | ,@(match (uri-userinfo uri) | |
548 | ((? string? str) | |
549 | `((Authorization . ,(string-append "Basic " | |
550 | (base64-encode | |
551 | (string->utf8 str)))))) | |
552 | (_ '())))) | |
2de227af | 553 | |
62cab99c | 554 | (let*-values (((connection) |
bc3c41ce LC |
555 | (open-connection-for-uri uri |
556 | #:timeout timeout | |
557 | #:verify-certificate? | |
558 | verify-certificate?)) | |
b3ac341d | 559 | ((resp port) |
36626c55 LC |
560 | (http-get uri #:port connection #:decode-body? #f |
561 | #:streaming? #t | |
562 | #:headers headers)) | |
62cab99c | 563 | ((code) |
347fa4ae | 564 | (response-code resp))) |
270246de LC |
565 | (case code |
566 | ((200) ; OK | |
347fa4ae | 567 | (values port (response-content-length resp))) |
67158a49 | 568 | ((301 ; moved permanently |
82fd23b8 | 569 | 302 ; found (redirection) |
57d28987 TGR |
570 | 303 ; see other |
571 | 307 ; temporary redirection | |
572 | 308) ; permanent redirection | |
04dec194 | 573 | (let ((uri (resolve-uri-reference (response-location resp) uri))) |
270246de LC |
574 | (format #t "following redirection to `~a'...~%" |
575 | (uri->string uri)) | |
576 | (close connection) | |
347fa4ae | 577 | (http-fetch uri |
bc3c41ce LC |
578 | #:timeout timeout |
579 | #:verify-certificate? verify-certificate?))) | |
270246de LC |
580 | (else |
581 | (error "download failed" (uri->string uri) | |
582 | code (response-reason-phrase resp)))))) | |
62cab99c LC |
583 | |
584 | \f | |
585 | (define-syntax-rule (false-if-exception* body ...) | |
586 | "Like `false-if-exception', but print the exception on the error port." | |
587 | (catch #t | |
588 | (lambda () | |
589 | body ...) | |
590 | (lambda (key . args) | |
591 | #f) | |
592 | (lambda (key . args) | |
593 | (print-exception (current-error-port) #f key args)))) | |
594 | ||
dd8ea244 LC |
595 | (define (uri-vicinity dir file) |
596 | "Concatenate DIR, slash, and FILE, keeping only one slash in between. | |
597 | This is required by some HTTP servers." | |
598 | (string-append (string-trim-right dir #\/) "/" | |
599 | (string-trim file #\/))) | |
600 | ||
601 | (define (maybe-expand-mirrors uri mirrors) | |
602 | "If URI uses the 'mirror' scheme, expand it according to the MIRRORS alist. | |
603 | Return a list of URIs." | |
604 | (case (uri-scheme uri) | |
605 | ((mirror) | |
606 | (let ((kind (string->symbol (uri-host uri))) | |
607 | (path (uri-path uri))) | |
608 | (match (assoc-ref mirrors kind) | |
609 | ((mirrors ..1) | |
610 | (map (compose string->uri (cut uri-vicinity <> path)) | |
611 | mirrors)) | |
612 | (_ | |
613 | (error "unsupported URL mirror kind" kind uri))))) | |
614 | (else | |
615 | (list uri)))) | |
616 | ||
cd436bf0 LC |
617 | (define* (url-fetch url file |
618 | #:key | |
bc3c41ce | 619 | (timeout 10) (verify-certificate? #t) |
cd436bf0 | 620 | (mirrors '()) (content-addressed-mirrors '()) |
240a9c69 LC |
621 | (hashes '()) |
622 | print-build-trace?) | |
62cab99c LC |
623 | "Fetch FILE from URL; URL may be either a single string, or a list of |
624 | string denoting alternate URLs for FILE. Return #f on failure, and FILE | |
cd436bf0 LC |
625 | on success. |
626 | ||
627 | When MIRRORS is defined, it must be an alist of mirrors; it is used to resolve | |
628 | 'mirror://' URIs. | |
629 | ||
630 | HASHES must be a list of algorithm/hash pairs, where each algorithm is a | |
631 | symbol such as 'sha256 and each hash is a bytevector. | |
632 | CONTENT-ADDRESSED-MIRRORS must be a list of procedures that, given a hash | |
633 | algorithm and a hash, return a URL where the specified data can be retrieved | |
bc3c41ce LC |
634 | or #f. |
635 | ||
636 | When VERIFY-CERTIFICATE? is true, validate HTTPS server certificates; | |
637 | otherwise simply ignore them." | |
62cab99c | 638 | (define uri |
dd8ea244 | 639 | (append-map (cut maybe-expand-mirrors <> mirrors) |
94d222ad LC |
640 | (match url |
641 | ((_ ...) (map string->uri url)) | |
642 | (_ (list (string->uri url)))))) | |
62cab99c LC |
643 | |
644 | (define (fetch uri file) | |
94628828 | 645 | (format #t "~%Starting download of ~a~%From ~a...~%" |
62cab99c LC |
646 | file (uri->string uri)) |
647 | (case (uri-scheme uri) | |
483f1158 | 648 | ((http https) |
347fa4ae LC |
649 | (false-if-exception* |
650 | (let-values (((port size) | |
651 | (http-fetch uri | |
652 | #:verify-certificate? verify-certificate? | |
653 | #:timeout timeout))) | |
654 | (call-with-output-file file | |
655 | (lambda (output) | |
656 | (dump-port* port output | |
657 | #:buffer-size %http-receive-buffer-size | |
240a9c69 LC |
658 | #:reporter (if print-build-trace? |
659 | (progress-reporter/trace | |
660 | file (uri->string uri) size) | |
661 | (progress-reporter/file | |
662 | (uri-abbreviation uri) size))) | |
347fa4ae | 663 | (newline))) |
8fbc1a22 | 664 | file))) |
483f1158 | 665 | ((ftp) |
bc3c41ce | 666 | (false-if-exception* (ftp-fetch uri file |
240a9c69 LC |
667 | #:timeout timeout |
668 | #:print-build-trace? | |
669 | print-build-trace?))) | |
62cab99c LC |
670 | (else |
671 | (format #t "skipping URI with unsupported scheme: ~s~%" | |
672 | uri) | |
673 | #f))) | |
674 | ||
dab2472c | 675 | (define content-addressed-uris |
cd436bf0 LC |
676 | (append-map (lambda (make-url) |
677 | (filter-map (match-lambda | |
678 | ((hash-algo . hash) | |
38f1cf8a LC |
679 | (let ((file (strip-store-file-name file))) |
680 | (string->uri (make-url file hash-algo hash))))) | |
cd436bf0 LC |
681 | hashes)) |
682 | content-addressed-mirrors)) | |
683 | ||
76832d34 | 684 | ;; Make this unbuffered so 'progress-report/file' works as expected. 'line |
79864851 | 685 | ;; means '\n', not '\r', so it's not appropriate here. |
76832d34 | 686 | (setvbuf (current-output-port) 'none) |
395bea2a | 687 | |
76832d34 | 688 | (setvbuf (current-error-port) 'line) |
62cab99c | 689 | |
dab2472c | 690 | (let try ((uri (append uri content-addressed-uris))) |
62cab99c LC |
691 | (match uri |
692 | ((uri tail ...) | |
693 | (or (fetch uri file) | |
694 | (try tail))) | |
695 | (() | |
696 | (format (current-error-port) "failed to download ~s from ~s~%" | |
697 | file url) | |
4a6ec23a LC |
698 | |
699 | ;; Remove FILE in case we made an incomplete download, for example due | |
700 | ;; to ENOSPC. | |
701 | (catch 'system-error | |
702 | (lambda () | |
703 | (delete-file file)) | |
704 | (const #f)) | |
62cab99c LC |
705 | #f)))) |
706 | ||
707 | ;;; download.scm ends here |