svn-download: Pass multi-fetch parameters through environment variables.
[jackhill/guix/guix.git] / guix / swh.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2018, 2019, 2020, 2021 Ludovic Courtès <ludo@gnu.org>
3 ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
4 ;;; Copyright © 2021 Xinglu Chen <public@yoctocell.xyz>
5 ;;; Copyright © 2021 Simon Tournier <zimon.toutoune@gmail.com>
6 ;;;
7 ;;; This file is part of GNU Guix.
8 ;;;
9 ;;; GNU Guix is free software; you can redistribute it and/or modify it
10 ;;; under the terms of the GNU General Public License as published by
11 ;;; the Free Software Foundation; either version 3 of the License, or (at
12 ;;; your option) any later version.
13 ;;;
14 ;;; GNU Guix is distributed in the hope that it will be useful, but
15 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
16 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 ;;; GNU General Public License for more details.
18 ;;;
19 ;;; You should have received a copy of the GNU General Public License
20 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
21
22 (define-module (guix swh)
23 #:use-module (guix base16)
24 #:use-module (guix build utils)
25 #:use-module ((guix build syscalls) #:select (mkdtemp!))
26 #:use-module (web uri)
27 #:use-module (web client)
28 #:use-module (web response)
29 #:use-module (json)
30 #:use-module (srfi srfi-1)
31 #:use-module (srfi srfi-9)
32 #:use-module (srfi srfi-11)
33 #:use-module (srfi srfi-19)
34 #:use-module (ice-9 match)
35 #:use-module (ice-9 regex)
36 #:use-module (ice-9 popen)
37 #:use-module ((ice-9 ftw) #:select (scandir))
38 #:export (%swh-base-url
39 %verify-swh-certificate?
40 %allow-request?
41
42 request-rate-limit-reached?
43
44 origin?
45 origin-type
46 origin-url
47 origin-visits
48 lookup-origin
49
50 visit?
51 visit-date
52 visit-origin
53 visit-url
54 visit-snapshot-url
55 visit-status
56 visit-number
57 visit-snapshot
58
59 snapshot?
60 snapshot-id
61 snapshot-branches
62 lookup-snapshot-branch
63
64 branch?
65 branch-name
66 branch-target
67
68 release?
69 release-id
70 release-name
71 release-message
72 release-target
73
74 revision?
75 revision-id
76 revision-date
77 revision-directory
78 lookup-revision
79 lookup-origin-revision
80
81 content?
82 content-checksums
83 content-data-url
84 content-length
85 lookup-content
86
87 directory-entry?
88 directory-entry-name
89 directory-entry-type
90 directory-entry-checksums
91 directory-entry-length
92 directory-entry-permissions
93 lookup-directory
94 directory-entry-target
95
96 save-reply?
97 save-reply-origin-url
98 save-reply-origin-type
99 save-reply-request-date
100 save-reply-request-status
101 save-reply-task-status
102 save-origin
103 save-origin-status
104
105 vault-reply?
106 vault-reply-id
107 vault-reply-fetch-url
108 vault-reply-progress-message
109 vault-reply-status
110 vault-reply-swhid
111 query-vault
112 request-cooking
113 vault-fetch
114
115 commit-id?
116
117 swh-download-directory
118 swh-download))
119
120 ;;; Commentary:
121 ;;;
122 ;;; This module provides bindings to the HTTP interface of Software Heritage.
123 ;;; It allows you to browse the archive, look up revisions (such as SHA1
124 ;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See
125 ;;; <https://archive.softwareheritage.org/api/> for more information.
126 ;;;
127 ;;; The high-level 'swh-download' procedure allows you to download a Git
128 ;;; revision from Software Heritage, provided it is available.
129 ;;;
130 ;;; Code:
131
132 (define %swh-base-url
133 ;; Presumably we won't need to change it.
134 (make-parameter "https://archive.softwareheritage.org"))
135
136 (define %verify-swh-certificate?
137 ;; Whether to verify the X.509 HTTPS certificate for %SWH-BASE-URL.
138 (make-parameter #t))
139
140 ;; Token from an account to the Software Heritage Authentication service
141 ;; <https://archive.softwareheritage.org/api/>
142 (define %swh-token
143 (make-parameter (and=> (getenv "GUIX_SWH_TOKEN")
144 string->symbol)))
145
146 (define (swh-url path . rest)
147 ;; URLs returned by the API may be relative or absolute. This has changed
148 ;; without notice before. Handle both cases by detecting whether the path
149 ;; starts with a domain.
150 (define root
151 (if (string-prefix? "/" path)
152 (string-append (%swh-base-url) path)
153 path))
154
155 (define url
156 (string-append root (string-join rest "/" 'prefix)))
157
158 ;; Ensure there's a trailing slash or we get a redirect.
159 (if (string-suffix? "/" url)
160 url
161 (string-append url "/")))
162
163 ;; XXX: Work around a bug in Guile 3.0.2 where #:verify-certificate? would
164 ;; be ignored (<https://bugs.gnu.org/40486>).
165 (define* (http-get* uri #:rest rest)
166 (apply http-request uri #:method 'GET rest))
167 (define* (http-post* uri #:rest rest)
168 (apply http-request uri #:method 'POST rest))
169
170 (define %date-regexp
171 ;; Match strings like "2014-11-17T22:09:38+01:00" or
172 ;; "2018-09-30T23:20:07.815449+00:00"".
173 (make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$"))
174
175 (define (string->date* str)
176 "Return a SRFI-19 date parsed from STR, a date string as returned by
177 Software Heritage."
178 ;; We can't use 'string->date' because of the timezone format: SWH returns
179 ;; "+01:00" when the '~z' template expects "+0100". So we roll our own!
180 (or (and=> (regexp-exec %date-regexp str)
181 (lambda (match)
182 (define (ref n)
183 (string->number (match:substring match n)))
184
185 (make-date (let ((ns (match:substring match 8)))
186 (if ns
187 (string->number (string-drop ns 1))
188 0))
189 (ref 6) (ref 5) (ref 4)
190 (ref 3) (ref 2) (ref 1)
191 (+ (* 3600 (ref 9)) ;time zone
192 (if (< (ref 9) 0)
193 (- (ref 10))
194 (ref 10))))))
195 str)) ;oops!
196
197 (define (maybe-null proc)
198 (match-lambda
199 ((? null?) #f)
200 ('null #f)
201 (obj (proc obj))))
202
203 (define string*
204 ;; Converts "string or #nil" coming from JSON to "string or #f".
205 (match-lambda
206 ((? string? str) str)
207 ((? null?) #f) ;Guile-JSON 3.x
208 ('null #f))) ;Guile-JSON 4.x
209
210 (define %allow-request?
211 ;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true
212 ;; to keep going. This can be used to disallow requests when
213 ;; 'request-rate-limit-reached?' returns true, for instance.
214 (make-parameter (const #t)))
215
216 ;; The time when the rate limit for "/origin/save" POST requests and that of
217 ;; other requests will be reset.
218 ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
219 (define %save-rate-limit-reset-time 0)
220 (define %general-rate-limit-reset-time 0)
221
222 (define (request-rate-limit-reached? url method)
223 "Return true if the rate limit has been reached for URI."
224 (define uri
225 (string->uri url))
226
227 (define reset-time
228 (if (and (eq? method http-post*)
229 (string-prefix? "/api/1/origin/save/" (uri-path uri)))
230 %save-rate-limit-reset-time
231 %general-rate-limit-reset-time))
232
233 (< (car (gettimeofday)) reset-time))
234
235 (define (update-rate-limit-reset-time! url method response)
236 "Update the rate limit reset time for URL and METHOD based on the headers in
237 RESPONSE."
238 (let ((uri (string->uri url)))
239 (match (assq-ref (response-headers response) 'x-ratelimit-reset)
240 ((= string->number (? number? reset))
241 (if (and (eq? method http-post*)
242 (string-prefix? "/api/1/origin/save/" (uri-path uri)))
243 (set! %save-rate-limit-reset-time reset)
244 (set! %general-rate-limit-reset-time reset)))
245 (_
246 #f))))
247
248 (define* (call url decode #:optional (method http-get*)
249 #:key (false-if-404? #t))
250 "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
251 using DECODE, a one-argument procedure that takes an input port. When
252 FALSE-IF-404? is true, return #f upon 404 responses."
253 (and ((%allow-request?) url method)
254 (let*-values (((response port)
255 (method url #:streaming? #t
256 #:headers
257 (if (%swh-token)
258 `((authorization . (Bearer ,(%swh-token))))
259 '())
260 #:verify-certificate?
261 (%verify-swh-certificate?))))
262 ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
263 (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
264 (#f #t)
265 ((? (compose zero? string->number))
266 (update-rate-limit-reset-time! url method response)
267 (throw 'swh-error url method response))
268 (_ #t))
269
270 (cond ((= 200 (response-code response))
271 (let ((result (decode port)))
272 (close-port port)
273 result))
274 ((and false-if-404?
275 (= 404 (response-code response)))
276 (close-port port)
277 #f)
278 (else
279 (close-port port)
280 (throw 'swh-error url method response))))))
281
282 (define-syntax define-query
283 (syntax-rules (path)
284 "Define a procedure that performs a Software Heritage query."
285 ((_ (name args ...) docstring (path components ...)
286 json->value)
287 (define (name args ...)
288 docstring
289 (call (swh-url components ...) json->value)))))
290
291 ;; <https://archive.softwareheritage.org/api/1/origin/https://github.com/guix-mirror/guix/get>
292 (define-json-mapping <origin> make-origin origin?
293 json->origin
294 (visits-url origin-visits-url "origin_visits_url")
295 (type origin-type)
296 (url origin-url))
297
298 ;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/>
299 (define-json-mapping <visit> make-visit visit?
300 json->visit
301 (date visit-date "date" string->date*)
302 (origin visit-origin)
303 (url visit-url "origin_visit_url")
304 (snapshot-url visit-snapshot-url "snapshot_url" string*) ;string | #f
305 (status visit-status "status" string->symbol) ;'full | 'partial | 'ongoing
306 (number visit-number "visit"))
307
308 ;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
309 (define-json-mapping <snapshot> make-snapshot snapshot?
310 json->snapshot
311 (id snapshot-id)
312 (branches snapshot-branches "branches" json->branches))
313
314 ;; This is used for the "branches" field of snapshots.
315 (define-record-type <branch>
316 (make-branch name target-type target-url)
317 branch?
318 (name branch-name)
319 (target-type branch-target-type) ;release | revision
320 (target-url branch-target-url))
321
322 (define (json->branches branches)
323 (map (match-lambda
324 ((key . value)
325 (make-branch key
326 (string->symbol
327 (assoc-ref value "target_type"))
328 (assoc-ref value "target_url"))))
329 branches))
330
331 ;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
332 (define-json-mapping <release> make-release release?
333 json->release
334 (id release-id)
335 (name release-name)
336 (message release-message)
337 (target-type release-target-type "target_type" string->symbol)
338 (target-url release-target-url "target_url"))
339
340 ;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/>
341 ;; Note: Some revisions, such as those for "nixguix" origins (e.g.,
342 ;; <https://archive.softwareheritage.org/api/1/revision/b8dbc65475bbedde8e015d4730ade8864c38fad3/>),
343 ;; have their 'date' field set to null.
344 (define-json-mapping <revision> make-revision revision?
345 json->revision
346 (id revision-id)
347 (date revision-date "date" (maybe-null string->date*))
348 (directory revision-directory)
349 (directory-url revision-directory-url "directory_url"))
350
351 ;; <https://archive.softwareheritage.org/api/1/content/>
352 (define-json-mapping <content> make-content content?
353 json->content
354 (checksums content-checksums "checksums" json->checksums)
355 (data-url content-data-url "data_url")
356 (file-type-url content-file-type-url "filetype_url")
357 (language-url content-language-url "language_url")
358 (length content-length)
359 (license-url content-license-url "license_url"))
360
361 (define (json->checksums checksums)
362 (map (match-lambda
363 ((key . value)
364 (cons key (base16-string->bytevector value))))
365 checksums))
366
367 ;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
368 (define-json-mapping <directory-entry> make-directory-entry directory-entry?
369 json->directory-entry
370 (name directory-entry-name)
371 (type directory-entry-type "type"
372 (match-lambda
373 ("dir" 'directory)
374 (str (string->symbol str))))
375 (checksums directory-entry-checksums "checksums"
376 (match-lambda
377 (#f #f)
378 ((? unspecified?) #f)
379 (lst (json->checksums lst))))
380 (id directory-entry-id "dir_id")
381 (length directory-entry-length)
382 (permissions directory-entry-permissions "perms")
383 (target-url directory-entry-target-url "target_url"))
384
385 ;; <https://archive.softwareheritage.org/api/1/origin/save/>
386 (define-json-mapping <save-reply> make-save-reply save-reply?
387 json->save-reply
388 (origin-url save-reply-origin-url "origin_url")
389 (origin-type save-reply-origin-type "origin_type")
390 (request-date save-reply-request-date "save_request_date"
391 string->date*)
392 (request-status save-reply-request-status "save_request_status"
393 string->symbol)
394 (task-status save-reply-task-status "save_task_status"
395 (match-lambda
396 ("not created" 'not-created)
397 ((? string? str) (string->symbol str)))))
398
399 ;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref>
400 (define-json-mapping <vault-reply> make-vault-reply vault-reply?
401 json->vault-reply
402 (id vault-reply-id)
403 (fetch-url vault-reply-fetch-url "fetch_url")
404 (progress-message vault-reply-progress-message "progress_message")
405 (status vault-reply-status "status" string->symbol)
406 (swhid vault-reply-swhid))
407
408 \f
409 ;;;
410 ;;; RPCs.
411 ;;;
412
413 (define-query (lookup-origin url)
414 "Return an origin for URL."
415 (path "/api/1/origin" url "get")
416 json->origin)
417
418 (define-query (lookup-content hash type)
419 "Return a content for HASH, of the given TYPE--e.g., \"sha256\"."
420 (path "/api/1/content"
421 (string-append type ":"
422 (bytevector->base16-string hash)))
423 json->content)
424
425 (define-query (lookup-revision id)
426 "Return the revision with the given ID, typically a Git commit SHA1."
427 (path "/api/1/revision" id)
428 json->revision)
429
430 (define-query (lookup-directory id)
431 "Return the directory with the given ID."
432 (path "/api/1/directory" id)
433 json->directory-entries)
434
435 (define (json->directory-entries port)
436 (map json->directory-entry
437 (vector->list (json->scm port))))
438
439 (define (origin-visits origin)
440 "Return the list of visits of ORIGIN, a record as returned by
441 'lookup-origin'."
442 (call (swh-url (origin-visits-url origin))
443 (lambda (port)
444 (map json->visit (vector->list (json->scm port))))))
445
446 (define (visit-snapshot visit)
447 "Return the snapshot corresponding to VISIT or #f if no snapshot is
448 available."
449 (and (visit-snapshot-url visit)
450 (call (swh-url (visit-snapshot-url visit))
451 json->snapshot)))
452
453 (define (snapshot-url snapshot branch-count first-branch)
454 "Return the URL of SNAPSHOT such that it contains information for
455 BRANCH-COUNT branches, starting at FIRST-BRANCH."
456 (string-append (swh-url "/api/1/snapshot" (snapshot-id snapshot))
457 "?branches_count=" (number->string branch-count)
458 "&branches_from=" (uri-encode first-branch)))
459
460 (define (lookup-snapshot-branch snapshot name)
461 "Look up branch NAME on SNAPSHOT. Return the branch, or return #f if it
462 could not be found."
463 (or (find (lambda (branch)
464 (string=? (branch-name branch) name))
465 (snapshot-branches snapshot))
466
467 ;; There's no API entry point to look up a snapshot branch by name.
468 ;; Work around that by using the paginated list of branches provided by
469 ;; the /api/1/snapshot API: ask for one branch, and start pagination at
470 ;; NAME.
471 (let ((snapshot (call (snapshot-url snapshot 1 name)
472 json->snapshot)))
473 (match (snapshot-branches snapshot)
474 ((branch)
475 (and (string=? (branch-name branch) name)
476 branch))
477 (_ #f)))))
478
479 (define (branch-target branch)
480 "Return the target of BRANCH, either a <revision> or a <release>."
481 (match (branch-target-type branch)
482 ('release
483 (call (swh-url (branch-target-url branch))
484 json->release))
485 ('revision
486 (call (swh-url (branch-target-url branch))
487 json->revision))))
488
489 (define (lookup-origin-revision url tag)
490 "Return a <revision> corresponding to the given TAG for the repository
491 coming from URL. Example:
492
493 (lookup-origin-revision \"https://github.com/guix-mirror/guix/\" \"v0.8\")
494 => #<<revision> id: \"44941…\" …>
495
496 The information is based on the latest visit of URL available. Return #f if
497 URL could not be found."
498 (match (lookup-origin url)
499 (#f #f)
500 (origin
501 (match (filter (lambda (visit)
502 ;; Return #f if (visit-snapshot VISIT) would return #f.
503 (and (visit-snapshot-url visit)
504 (eq? 'full (visit-status visit))))
505 (origin-visits origin))
506 ((visit . _)
507 (let ((snapshot (visit-snapshot visit)))
508 (match (and=> (find (lambda (branch)
509 (or
510 ;; Git specific.
511 (string=? (string-append "refs/tags/" tag)
512 (branch-name branch))
513 ;; Hg specific.
514 (string=? tag
515 (branch-name branch))))
516 (snapshot-branches snapshot))
517 branch-target)
518 ((? release? release)
519 (release-target release))
520 ((? revision? revision)
521 revision)
522 (#f ;tag not found
523 #f))))
524 (()
525 #f)))))
526
527 (define (release-target release)
528 "Return the revision that is the target of RELEASE."
529 (match (release-target-type release)
530 ('revision
531 (call (swh-url (release-target-url release))
532 json->revision))))
533
534 (define (directory-entry-target entry)
535 "If ENTRY, a directory entry, has type 'directory, return its list of
536 directory entries; if it has type 'file, return its <content> object."
537 (call (swh-url (directory-entry-target-url entry))
538 (match (directory-entry-type entry)
539 ('file json->content)
540 ('directory json->directory-entries))))
541
542 (define* (save-origin url #:optional (type "git"))
543 "Request URL to be saved."
544 (call (swh-url "/api/1/origin/save" type "url" url) json->save-reply
545 http-post*))
546
547 (define-query (save-origin-status url type)
548 "Return the status of a /save request for URL and TYPE (e.g., \"git\")."
549 (path "/api/1/origin/save" type "url" url)
550 json->save-reply)
551
552 (define* (vault-url id kind #:optional (archive-type 'flat))
553 "Return the vault query/cooking URL for ID and KIND. Normally, ID is an
554 SWHID and KIND is #f; the deprecated convention is to set ID to a raw
555 directory or revision ID and KIND to 'revision or 'directory."
556 ;; Note: /api/1/vault/directory/ID was deprecated in favor of
557 ;; /api/1/vault/flat/SWHID; this procedure "converts" automatically.
558 (let ((id (match kind
559 ('directory (string-append "swh:1:dir:" id))
560 ('revision (string-append "swh:1:rev:" id))
561 (#f id))))
562 (swh-url "/api/1/vault" (symbol->string archive-type) id)))
563
564 (define* (query-vault id #:optional kind #:key (archive-type 'flat))
565 "Ask the availability of object ID (an SWHID) to the vault. Return #f if it
566 could not be found, or a <vault-reply> on success. ARCHIVE-TYPE can be 'flat
567 for a tarball containing a directory, or 'git-bare for a tarball containing a
568 bare Git repository corresponding to a revision.
569
570 Passing KIND (one of 'directory or 'revision) together with a raw revision or
571 directory identifier is deprecated."
572 (call (vault-url id kind archive-type)
573 json->vault-reply))
574
575 (define* (request-cooking id #:optional kind #:key (archive-type 'flat))
576 "Request the cooking of object ID, an SWHID. Return a <vault-reply>.
577 ARCHIVE-TYPE can be 'flat for a tarball containing a directory, or 'git-bare
578 for a tarball containing a bare Git repository corresponding to a revision.
579
580 Passing KIND (one of 'directory or 'revision) together with a raw revision or
581 directory identifier is deprecated."
582 (call (vault-url id kind archive-type)
583 json->vault-reply
584 http-post*))
585
586 (define* (vault-fetch id
587 #:optional kind
588 #:key
589 (archive-type 'flat)
590 (log-port (current-error-port)))
591 "Return an input port from which a bundle of the object with the given ID,
592 an SWHID, or #f if the object could not be found.
593
594 ARCHIVE-TYPE can be 'flat for a tarball containing a directory, or 'git-bare
595 for a tarball containing a bare Git repository corresponding to a revision."
596 (let loop ((reply (query-vault id kind
597 #:archive-type archive-type)))
598 (match reply
599 (#f
600 (and=> (request-cooking id kind
601 #:archive-type archive-type)
602 loop))
603 (_
604 (match (vault-reply-status reply)
605 ('done
606 ;; Fetch the bundle.
607 (let-values (((response port)
608 (http-get* (swh-url (vault-reply-fetch-url reply))
609 #:streaming? #t
610 #:verify-certificate?
611 (%verify-swh-certificate?))))
612 (if (= (response-code response) 200)
613 port
614 (begin ;shouldn't happen
615 (close-port port)
616 #f))))
617 ('failed
618 ;; Upon failure, we're supposed to try again.
619 (format log-port "SWH vault: failure: ~a~%"
620 (vault-reply-progress-message reply))
621 (format log-port "SWH vault: retrying...~%")
622 (loop (request-cooking id kind
623 #:archive-type archive-type)))
624 ((and (or 'new 'pending) status)
625 ;; Wait until the bundle shows up.
626 (let ((message (vault-reply-progress-message reply)))
627 (when (eq? 'new status)
628 (format log-port "SWH vault: \
629 requested bundle cooking, waiting for completion...~%"))
630 (when (string? message)
631 (format log-port "SWH vault: ~a~%" message))
632
633 ;; Wait long enough so we don't exhaust our maximum number of
634 ;; requests per hour too fast (as of this writing, the limit is 60
635 ;; requests per hour per IP address.)
636 (sleep (if (eq? status 'new) 60 30))
637
638 (loop (query-vault id kind
639 #:archive-type archive-type)))))))))
640
641 \f
642 ;;;
643 ;;; High-level interface.
644 ;;;
645
646 (define (call-with-temporary-directory proc) ;FIXME: factorize
647 "Call PROC with a name of a temporary directory; close the directory and
648 delete it when leaving the dynamic extent of this call."
649 (let* ((directory (or (getenv "TMPDIR") "/tmp"))
650 (template (string-append directory "/guix-directory.XXXXXX"))
651 (tmp-dir (mkdtemp! template)))
652 (dynamic-wind
653 (const #t)
654 (lambda ()
655 (proc tmp-dir))
656 (lambda ()
657 (false-if-exception (delete-file-recursively tmp-dir))))))
658
659 (define* (swh-download-archive swhid output
660 #:key
661 (archive-type 'flat)
662 (log-port (current-error-port)))
663 "Download from Software Heritage the directory or revision with the given
664 SWID, in the ARCHIVE-TYPE format (one of 'flat or 'git-bare), and unpack it to
665 OUTPUT. Return #t on success and #f on failure."
666 (call-with-temporary-directory
667 (lambda (directory)
668 (match (vault-fetch swhid
669 #:archive-type archive-type
670 #:log-port log-port)
671 (#f
672 (format log-port
673 "SWH: object ~a could not be fetched from the vault~%"
674 swhid)
675 #f)
676 ((? port? input)
677 (let ((tar (open-pipe* OPEN_WRITE "tar" "-C" directory
678 (match archive-type
679 ('flat "-xzvf") ;gzipped
680 ('git-bare "-xvf")) ;uncompressed
681 "-")))
682 (dump-port input tar)
683 (close-port input)
684 (let ((status (close-pipe tar)))
685 (unless (zero? status)
686 (error "tar extraction failure" status)))
687
688 (match (scandir directory)
689 (("." ".." sub-directory)
690 (copy-recursively (string-append directory "/" sub-directory)
691 output
692 #:log (%make-void-port "w"))
693 #t))))))))
694
695 (define* (swh-download-directory id output
696 #:key (log-port (current-error-port)))
697 "Download from Software Heritage the directory with the given ID, and
698 unpack it to OUTPUT. Return #t on success and #f on failure."
699 (swh-download-archive (string-append "swh:1:dir:" id) output
700 #:archive-type 'flat
701 #:log-port log-port))
702
703 (define (commit-id? reference)
704 "Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
705 it is a tag name. This is based on a simple heuristic so use with care!"
706 (and (= (string-length reference) 40)
707 (string-every char-set:hex-digit reference)))
708
709 (define* (swh-download url reference output
710 #:key
711 (archive-type 'flat)
712 (log-port (current-error-port)))
713 "Download from Software Heritage a checkout (if ARCHIVE-TYPE is 'flat) or a
714 full Git repository (if ARCHIVE-TYPE is 'git-bare) of the Git tag or commit
715 REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
716 and #f on failure.
717
718 This procedure uses the \"vault\", which contains \"cooked\" directories in
719 the form of tarballs. If the requested directory is not cooked yet, it will
720 wait until it becomes available, which could take several minutes."
721 (match (if (commit-id? reference)
722 (lookup-revision reference)
723 (lookup-origin-revision url reference))
724 ((? revision? revision)
725 (format log-port "SWH: found revision ~a with directory at '~a'~%"
726 (revision-id revision)
727 (swh-url (revision-directory-url revision)))
728 (swh-download-archive (match archive-type
729 ('flat
730 (string-append
731 "swh:1:dir:" (revision-directory revision)))
732 ('git-bare
733 (string-append
734 "swh:1:rev:" (revision-id revision))))
735 output
736 #:archive-type archive-type
737 #:log-port log-port))
738 (#f
739 (format log-port
740 "SWH: revision ~s originating from ~a could not be found~%"
741 reference url)
742 #f)))