Commit | Line | Data |
---|---|---|
de2bfe90 | 1 | ;;; GNU Guix --- Functional package management for GNU |
3d2f2938 | 2 | ;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org> |
6a3911b8 | 3 | ;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net> |
de2bfe90 LC |
4 | ;;; |
5 | ;;; This file is part of GNU Guix. | |
6 | ;;; | |
7 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
8 | ;;; under the terms of the GNU General Public License as published by | |
9 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
10 | ;;; your option) any later version. | |
11 | ;;; | |
12 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
13 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
14 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
15 | ;;; GNU General Public License for more details. | |
16 | ;;; | |
17 | ;;; You should have received a copy of the GNU General Public License | |
18 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
19 | ||
20 | (define-module (guix swh) | |
21 | #:use-module (guix base16) | |
22 | #:use-module (guix build utils) | |
23 | #:use-module ((guix build syscalls) #:select (mkdtemp!)) | |
ba1c1853 | 24 | #:use-module (web uri) |
76073d29 | 25 | #:use-module (guix json) |
de2bfe90 LC |
26 | #:use-module (web client) |
27 | #:use-module (web response) | |
28 | #:use-module (json) | |
29 | #:use-module (srfi srfi-1) | |
30 | #:use-module (srfi srfi-9) | |
31 | #:use-module (srfi srfi-11) | |
32 | #:use-module (srfi srfi-19) | |
33 | #:use-module (ice-9 match) | |
34 | #:use-module (ice-9 regex) | |
35 | #:use-module (ice-9 popen) | |
36 | #:use-module ((ice-9 ftw) #:select (scandir)) | |
96f1cbef | 37 | #:export (%swh-base-url |
722ad41c | 38 | %verify-swh-certificate? |
ba1c1853 LC |
39 | %allow-request? |
40 | ||
41 | request-rate-limit-reached? | |
96f1cbef LC |
42 | |
43 | origin? | |
de2bfe90 LC |
44 | origin-type |
45 | origin-url | |
46 | origin-visits | |
47 | lookup-origin | |
48 | ||
49 | visit? | |
50 | visit-date | |
51 | visit-origin | |
52 | visit-url | |
53 | visit-snapshot-url | |
54 | visit-status | |
55 | visit-number | |
56 | visit-snapshot | |
57 | ||
58 | branch? | |
59 | branch-name | |
60 | branch-target | |
61 | ||
62 | release? | |
63 | release-id | |
64 | release-name | |
65 | release-message | |
66 | release-target | |
67 | ||
68 | revision? | |
69 | revision-id | |
70 | revision-date | |
71 | revision-directory | |
72 | lookup-revision | |
73 | lookup-origin-revision | |
74 | ||
75 | content? | |
76 | content-checksums | |
77 | content-data-url | |
78 | content-length | |
79 | lookup-content | |
80 | ||
81 | directory-entry? | |
82 | directory-entry-name | |
83 | directory-entry-type | |
84 | directory-entry-checksums | |
85 | directory-entry-length | |
86 | directory-entry-permissions | |
87 | lookup-directory | |
88 | directory-entry-target | |
89 | ||
ee6b3bb6 LC |
90 | save-reply? |
91 | save-reply-origin-url | |
92 | save-reply-origin-type | |
93 | save-reply-request-date | |
94 | save-reply-request-status | |
95 | save-reply-task-status | |
96 | save-origin | |
97 | save-origin-status | |
98 | ||
de2bfe90 LC |
99 | vault-reply? |
100 | vault-reply-id | |
101 | vault-reply-fetch-url | |
102 | vault-reply-object-id | |
103 | vault-reply-object-type | |
104 | vault-reply-progress-message | |
105 | vault-reply-status | |
106 | query-vault | |
107 | request-cooking | |
108 | vault-fetch | |
109 | ||
d370cc73 LC |
110 | commit-id? |
111 | ||
de2bfe90 LC |
112 | swh-download)) |
113 | ||
114 | ;;; Commentary: | |
115 | ;;; | |
116 | ;;; This module provides bindings to the HTTP interface of Software Heritage. | |
117 | ;;; It allows you to browse the archive, look up revisions (such as SHA1 | |
118 | ;;; commit IDs), "origins" (code hosting URLs), content (files), etc. See | |
119 | ;;; <https://archive.softwareheritage.org/api/> for more information. | |
120 | ;;; | |
121 | ;;; The high-level 'swh-download' procedure allows you to download a Git | |
122 | ;;; revision from Software Heritage, provided it is available. | |
123 | ;;; | |
124 | ;;; Code: | |
125 | ||
126 | (define %swh-base-url | |
127 | ;; Presumably we won't need to change it. | |
96f1cbef | 128 | (make-parameter "https://archive.softwareheritage.org")) |
de2bfe90 | 129 | |
722ad41c LC |
130 | (define %verify-swh-certificate? |
131 | ;; Whether to verify the X.509 HTTPS certificate for %SWH-BASE-URL. | |
132 | (make-parameter #t)) | |
133 | ||
de2bfe90 | 134 | (define (swh-url path . rest) |
6a3911b8 JK |
135 | ;; URLs returned by the API may be relative or absolute. This has changed |
136 | ;; without notice before. Handle both cases by detecting whether the path | |
137 | ;; starts with a domain. | |
138 | (define root | |
139 | (if (string-prefix? "/" path) | |
140 | (string-append (%swh-base-url) path) | |
141 | path)) | |
142 | ||
de2bfe90 | 143 | (define url |
6a3911b8 | 144 | (string-append root (string-join rest "/" 'prefix))) |
de2bfe90 LC |
145 | |
146 | ;; Ensure there's a trailing slash or we get a redirect. | |
147 | (if (string-suffix? "/" url) | |
148 | url | |
149 | (string-append url "/"))) | |
150 | ||
722ad41c LC |
151 | ;; XXX: Work around a bug in Guile 3.0.2 where #:verify-certificate? would |
152 | ;; be ignored (<https://bugs.gnu.org/40486>). | |
153 | (define* (http-get* uri #:rest rest) | |
154 | (apply http-request uri #:method 'GET rest)) | |
155 | (define* (http-post* uri #:rest rest) | |
156 | (apply http-request uri #:method 'POST rest)) | |
157 | ||
de2bfe90 LC |
158 | (define %date-regexp |
159 | ;; Match strings like "2014-11-17T22:09:38+01:00" or | |
160 | ;; "2018-09-30T23:20:07.815449+00:00"". | |
161 | (make-regexp "^([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})((\\.[0-9]+)?)([+-][0-9]{2}):([0-9]{2})$")) | |
162 | ||
163 | (define (string->date* str) | |
164 | "Return a SRFI-19 date parsed from STR, a date string as returned by | |
165 | Software Heritage." | |
166 | ;; We can't use 'string->date' because of the timezone format: SWH returns | |
167 | ;; "+01:00" when the '~z' template expects "+0100". So we roll our own! | |
168 | (or (and=> (regexp-exec %date-regexp str) | |
169 | (lambda (match) | |
170 | (define (ref n) | |
171 | (string->number (match:substring match n))) | |
172 | ||
173 | (make-date (let ((ns (match:substring match 8))) | |
174 | (if ns | |
175 | (string->number (string-drop ns 1)) | |
176 | 0)) | |
177 | (ref 6) (ref 5) (ref 4) | |
178 | (ref 3) (ref 2) (ref 1) | |
179 | (+ (* 3600 (ref 9)) ;time zone | |
180 | (if (< (ref 9) 0) | |
181 | (- (ref 10)) | |
182 | (ref 10)))))) | |
183 | str)) ;oops! | |
184 | ||
8146c486 LC |
185 | (define string* |
186 | ;; Converts "string or #nil" coming from JSON to "string or #f". | |
187 | (match-lambda | |
188 | ((? string? str) str) | |
cc6dd298 LC |
189 | ((? null?) #f) ;Guile-JSON 3.x |
190 | ('null #f))) ;Guile-JSON 4.x | |
8146c486 | 191 | |
ba1c1853 LC |
192 | (define %allow-request? |
193 | ;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true | |
722ad41c | 194 | ;; to keep going. This can be used to disallow requests when |
ba1c1853 LC |
195 | ;; 'request-rate-limit-reached?' returns true, for instance. |
196 | (make-parameter (const #t))) | |
197 | ||
198 | ;; The time when the rate limit for "/origin/save" POST requests and that of | |
199 | ;; other requests will be reset. | |
200 | ;; See <https://archive.softwareheritage.org/api/#rate-limiting>. | |
201 | (define %save-rate-limit-reset-time 0) | |
202 | (define %general-rate-limit-reset-time 0) | |
203 | ||
204 | (define (request-rate-limit-reached? url method) | |
205 | "Return true if the rate limit has been reached for URI." | |
206 | (define uri | |
207 | (string->uri url)) | |
208 | ||
209 | (define reset-time | |
722ad41c | 210 | (if (and (eq? method http-post*) |
ba1c1853 LC |
211 | (string-prefix? "/api/1/origin/save/" (uri-path uri))) |
212 | %save-rate-limit-reset-time | |
213 | %general-rate-limit-reset-time)) | |
214 | ||
215 | (< (car (gettimeofday)) reset-time)) | |
216 | ||
217 | (define (update-rate-limit-reset-time! url method response) | |
218 | "Update the rate limit reset time for URL and METHOD based on the headers in | |
219 | RESPONSE." | |
220 | (let ((uri (string->uri url))) | |
221 | (match (assq-ref (response-headers response) 'x-ratelimit-reset) | |
222 | ((= string->number (? number? reset)) | |
722ad41c | 223 | (if (and (eq? method http-post*) |
ba1c1853 LC |
224 | (string-prefix? "/api/1/origin/save/" (uri-path uri))) |
225 | (set! %save-rate-limit-reset-time reset) | |
226 | (set! %general-rate-limit-reset-time reset))) | |
227 | (_ | |
228 | #f)))) | |
229 | ||
722ad41c | 230 | (define* (call url decode #:optional (method http-get*) |
de2bfe90 LC |
231 | #:key (false-if-404? #t)) |
232 | "Invoke the endpoint at URL using METHOD. Decode the resulting JSON body | |
233 | using DECODE, a one-argument procedure that takes an input port. When | |
234 | FALSE-IF-404? is true, return #f upon 404 responses." | |
ba1c1853 LC |
235 | (and ((%allow-request?) url method) |
236 | (let*-values (((response port) | |
722ad41c LC |
237 | (method url #:streaming? #t |
238 | #:verify-certificate? | |
239 | (%verify-swh-certificate?)))) | |
ba1c1853 LC |
240 | ;; See <https://archive.softwareheritage.org/api/#rate-limiting>. |
241 | (match (assq-ref (response-headers response) 'x-ratelimit-remaining) | |
242 | (#f #t) | |
243 | ((? (compose zero? string->number)) | |
244 | (update-rate-limit-reset-time! url method response) | |
245 | (throw 'swh-error url method response)) | |
246 | (_ #t)) | |
247 | ||
248 | (cond ((= 200 (response-code response)) | |
249 | (let ((result (decode port))) | |
250 | (close-port port) | |
251 | result)) | |
252 | ((and false-if-404? | |
253 | (= 404 (response-code response))) | |
254 | (close-port port) | |
255 | #f) | |
256 | (else | |
257 | (close-port port) | |
258 | (throw 'swh-error url method response)))))) | |
de2bfe90 LC |
259 | |
260 | (define-syntax define-query | |
261 | (syntax-rules (path) | |
262 | "Define a procedure that performs a Software Heritage query." | |
263 | ((_ (name args ...) docstring (path components ...) | |
264 | json->value) | |
265 | (define (name args ...) | |
266 | docstring | |
267 | (call (swh-url components ...) json->value))))) | |
268 | ||
e507d30c | 269 | ;; <https://archive.softwareheritage.org/api/1/origin/https://github.com/guix-mirror/guix/get> |
de2bfe90 LC |
270 | (define-json-mapping <origin> make-origin origin? |
271 | json->origin | |
de2bfe90 LC |
272 | (visits-url origin-visits-url "origin_visits_url") |
273 | (type origin-type) | |
274 | (url origin-url)) | |
275 | ||
276 | ;; <https://archive.softwareheritage.org/api/1/origin/52181937/visits/> | |
277 | (define-json-mapping <visit> make-visit visit? | |
278 | json->visit | |
279 | (date visit-date "date" string->date*) | |
280 | (origin visit-origin) | |
281 | (url visit-url "origin_visit_url") | |
8146c486 LC |
282 | (snapshot-url visit-snapshot-url "snapshot_url" string*) ;string | #f |
283 | (status visit-status "status" string->symbol) ;'full | 'partial | 'ongoing | |
de2bfe90 LC |
284 | (number visit-number "visit")) |
285 | ||
286 | ;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/> | |
287 | (define-json-mapping <snapshot> make-snapshot snapshot? | |
288 | json->snapshot | |
289 | (branches snapshot-branches "branches" json->branches)) | |
290 | ||
291 | ;; This is used for the "branches" field of snapshots. | |
292 | (define-record-type <branch> | |
293 | (make-branch name target-type target-url) | |
294 | branch? | |
295 | (name branch-name) | |
296 | (target-type branch-target-type) ;release | revision | |
297 | (target-url branch-target-url)) | |
298 | ||
299 | (define (json->branches branches) | |
81c3dc32 LC |
300 | (map (match-lambda |
301 | ((key . value) | |
302 | (make-branch key | |
303 | (string->symbol | |
304 | (assoc-ref value "target_type")) | |
305 | (assoc-ref value "target_url")))) | |
306 | branches)) | |
de2bfe90 LC |
307 | |
308 | ;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/> | |
309 | (define-json-mapping <release> make-release release? | |
310 | json->release | |
311 | (id release-id) | |
312 | (name release-name) | |
313 | (message release-message) | |
314 | (target-type release-target-type "target_type" string->symbol) | |
315 | (target-url release-target-url "target_url")) | |
316 | ||
317 | ;; <https://archive.softwareheritage.org/api/1/revision/359fdda40f754bbf1b5dc261e7427b75463b59be/> | |
318 | (define-json-mapping <revision> make-revision revision? | |
319 | json->revision | |
320 | (id revision-id) | |
321 | (date revision-date "date" string->date*) | |
322 | (directory revision-directory) | |
323 | (directory-url revision-directory-url "directory_url")) | |
324 | ||
325 | ;; <https://archive.softwareheritage.org/api/1/content/> | |
326 | (define-json-mapping <content> make-content content? | |
327 | json->content | |
328 | (checksums content-checksums "checksums" json->checksums) | |
329 | (data-url content-data-url "data_url") | |
330 | (file-type-url content-file-type-url "filetype_url") | |
331 | (language-url content-language-url "language_url") | |
332 | (length content-length) | |
333 | (license-url content-license-url "license_url")) | |
334 | ||
335 | (define (json->checksums checksums) | |
81c3dc32 LC |
336 | (map (match-lambda |
337 | ((key . value) | |
338 | (cons key (base16-string->bytevector value)))) | |
339 | checksums)) | |
de2bfe90 LC |
340 | |
341 | ;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/> | |
342 | (define-json-mapping <directory-entry> make-directory-entry directory-entry? | |
343 | json->directory-entry | |
344 | (name directory-entry-name) | |
345 | (type directory-entry-type "type" | |
346 | (match-lambda | |
347 | ("dir" 'directory) | |
348 | (str (string->symbol str)))) | |
349 | (checksums directory-entry-checksums "checksums" | |
350 | (match-lambda | |
351 | (#f #f) | |
352 | (lst (json->checksums lst)))) | |
353 | (id directory-entry-id "dir_id") | |
354 | (length directory-entry-length) | |
355 | (permissions directory-entry-permissions "perms") | |
356 | (target-url directory-entry-target-url "target_url")) | |
357 | ||
358 | ;; <https://archive.softwareheritage.org/api/1/origin/save/> | |
359 | (define-json-mapping <save-reply> make-save-reply save-reply? | |
360 | json->save-reply | |
361 | (origin-url save-reply-origin-url "origin_url") | |
362 | (origin-type save-reply-origin-type "origin_type") | |
363 | (request-date save-reply-request-date "save_request_date" | |
364 | string->date*) | |
365 | (request-status save-reply-request-status "save_request_status" | |
366 | string->symbol) | |
367 | (task-status save-reply-task-status "save_task_status" | |
368 | (match-lambda | |
369 | ("not created" 'not-created) | |
370 | ((? string? str) (string->symbol str))))) | |
371 | ||
372 | ;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref> | |
373 | (define-json-mapping <vault-reply> make-vault-reply vault-reply? | |
374 | json->vault-reply | |
375 | (id vault-reply-id) | |
376 | (fetch-url vault-reply-fetch-url "fetch_url") | |
377 | (object-id vault-reply-object-id "obj_id") | |
378 | (object-type vault-reply-object-type "obj_type" string->symbol) | |
379 | (progress-message vault-reply-progress-message "progress_message") | |
380 | (status vault-reply-status "status" string->symbol)) | |
381 | ||
382 | \f | |
383 | ;;; | |
384 | ;;; RPCs. | |
385 | ;;; | |
386 | ||
387 | (define-query (lookup-origin url) | |
388 | "Return an origin for URL." | |
356a79be | 389 | (path "/api/1/origin" url "get") |
de2bfe90 LC |
390 | json->origin) |
391 | ||
392 | (define-query (lookup-content hash type) | |
393 | "Return a content for HASH, of the given TYPE--e.g., \"sha256\"." | |
394 | (path "/api/1/content" | |
395 | (string-append type ":" | |
396 | (bytevector->base16-string hash))) | |
397 | json->content) | |
398 | ||
399 | (define-query (lookup-revision id) | |
400 | "Return the revision with the given ID, typically a Git commit SHA1." | |
401 | (path "/api/1/revision" id) | |
402 | json->revision) | |
403 | ||
404 | (define-query (lookup-directory id) | |
405 | "Return the directory with the given ID." | |
406 | (path "/api/1/directory" id) | |
407 | json->directory-entries) | |
408 | ||
409 | (define (json->directory-entries port) | |
81c3dc32 LC |
410 | (map json->directory-entry |
411 | (vector->list (json->scm port)))) | |
de2bfe90 LC |
412 | |
413 | (define (origin-visits origin) | |
414 | "Return the list of visits of ORIGIN, a record as returned by | |
415 | 'lookup-origin'." | |
416 | (call (swh-url (origin-visits-url origin)) | |
417 | (lambda (port) | |
81c3dc32 | 418 | (map json->visit (vector->list (json->scm port)))))) |
de2bfe90 LC |
419 | |
420 | (define (visit-snapshot visit) | |
8146c486 LC |
421 | "Return the snapshot corresponding to VISIT or #f if no snapshot is |
422 | available." | |
423 | (and (visit-snapshot-url visit) | |
424 | (call (swh-url (visit-snapshot-url visit)) | |
425 | json->snapshot))) | |
de2bfe90 LC |
426 | |
427 | (define (branch-target branch) | |
428 | "Return the target of BRANCH, either a <revision> or a <release>." | |
429 | (match (branch-target-type branch) | |
430 | ('release | |
431 | (call (swh-url (branch-target-url branch)) | |
432 | json->release)) | |
433 | ('revision | |
434 | (call (swh-url (branch-target-url branch)) | |
435 | json->revision)))) | |
436 | ||
437 | (define (lookup-origin-revision url tag) | |
438 | "Return a <revision> corresponding to the given TAG for the repository | |
439 | coming from URL. Example: | |
440 | ||
8146c486 | 441 | (lookup-origin-revision \"https://github.com/guix-mirror/guix/\" \"v0.8\") |
de2bfe90 LC |
442 | => #<<revision> id: \"44941…\" …> |
443 | ||
444 | The information is based on the latest visit of URL available. Return #f if | |
445 | URL could not be found." | |
446 | (match (lookup-origin url) | |
447 | (#f #f) | |
448 | (origin | |
8146c486 | 449 | (match (filter visit-snapshot-url (origin-visits origin)) |
de2bfe90 LC |
450 | ((visit . _) |
451 | (let ((snapshot (visit-snapshot visit))) | |
452 | (match (and=> (find (lambda (branch) | |
453 | (string=? (string-append "refs/tags/" tag) | |
454 | (branch-name branch))) | |
455 | (snapshot-branches snapshot)) | |
456 | branch-target) | |
457 | ((? release? release) | |
458 | (release-target release)) | |
459 | ((? revision? revision) | |
460 | revision) | |
461 | (#f ;tag not found | |
462 | #f)))) | |
463 | (() | |
464 | #f))))) | |
465 | ||
466 | (define (release-target release) | |
467 | "Return the revision that is the target of RELEASE." | |
468 | (match (release-target-type release) | |
469 | ('revision | |
470 | (call (swh-url (release-target-url release)) | |
471 | json->revision)))) | |
472 | ||
473 | (define (directory-entry-target entry) | |
474 | "If ENTRY, a directory entry, has type 'directory, return its list of | |
475 | directory entries; if it has type 'file, return its <content> object." | |
476 | (call (swh-url (directory-entry-target-url entry)) | |
477 | (match (directory-entry-type entry) | |
478 | ('file json->content) | |
479 | ('directory json->directory-entries)))) | |
480 | ||
481 | (define* (save-origin url #:optional (type "git")) | |
482 | "Request URL to be saved." | |
483 | (call (swh-url "/api/1/origin/save" type "url" url) json->save-reply | |
722ad41c | 484 | http-post*)) |
de2bfe90 LC |
485 | |
486 | (define-query (save-origin-status url type) | |
487 | "Return the status of a /save request for URL and TYPE (e.g., \"git\")." | |
488 | (path "/api/1/origin/save" type "url" url) | |
489 | json->save-reply) | |
490 | ||
491 | (define-query (query-vault id kind) | |
492 | "Ask the availability of object ID and KIND to the vault, where KIND is | |
493 | 'directory or 'revision. Return #f if it could not be found, or a | |
494 | <vault-reply> on success." | |
495 | ;; <https://docs.softwareheritage.org/devel/swh-vault/api.html#vault-api-ref> | |
496 | ;; There's a single format supported for directories and revisions and for | |
497 | ;; now, the "/format" bit of the URL *must* be omitted. | |
498 | (path "/api/1/vault" (symbol->string kind) id) | |
499 | json->vault-reply) | |
500 | ||
501 | (define (request-cooking id kind) | |
502 | "Request the cooking of object ID and KIND (one of 'directory or 'revision) | |
503 | to the vault. Return a <vault-reply>." | |
504 | (call (swh-url "/api/1/vault" (symbol->string kind) id) | |
505 | json->vault-reply | |
722ad41c | 506 | http-post*)) |
de2bfe90 LC |
507 | |
508 | (define* (vault-fetch id kind | |
509 | #:key (log-port (current-error-port))) | |
510 | "Return an input port from which a bundle of the object with the given ID | |
511 | and KIND (one of 'directory or 'revision) can be retrieved, or #f if the | |
512 | object could not be found. | |
513 | ||
514 | For a directory, the returned stream is a gzip-compressed tarball. For a | |
515 | revision, it is a gzip-compressed stream for 'git fast-import'." | |
516 | (let loop ((reply (query-vault id kind))) | |
517 | (match reply | |
518 | (#f | |
519 | (and=> (request-cooking id kind) loop)) | |
520 | (_ | |
521 | (match (vault-reply-status reply) | |
522 | ('done | |
523 | ;; Fetch the bundle. | |
524 | (let-values (((response port) | |
722ad41c LC |
525 | (http-get* (swh-url (vault-reply-fetch-url reply)) |
526 | #:streaming? #t | |
527 | #:verify-certificate? | |
528 | (%verify-swh-certificate?)))) | |
de2bfe90 LC |
529 | (if (= (response-code response) 200) |
530 | port | |
531 | (begin ;shouldn't happen | |
532 | (close-port port) | |
533 | #f)))) | |
534 | ('failed | |
535 | ;; Upon failure, we're supposed to try again. | |
536 | (format log-port "SWH vault: failure: ~a~%" | |
537 | (vault-reply-progress-message reply)) | |
538 | (format log-port "SWH vault: retrying...~%") | |
539 | (loop (request-cooking id kind))) | |
540 | ((and (or 'new 'pending) status) | |
541 | ;; Wait until the bundle shows up. | |
542 | (let ((message (vault-reply-progress-message reply))) | |
543 | (when (eq? 'new status) | |
544 | (format log-port "SWH vault: \ | |
545 | requested bundle cooking, waiting for completion...~%")) | |
546 | (when (string? message) | |
547 | (format log-port "SWH vault: ~a~%" message)) | |
548 | ||
549 | ;; Wait long enough so we don't exhaust our maximum number of | |
550 | ;; requests per hour too fast (as of this writing, the limit is 60 | |
551 | ;; requests per hour per IP address.) | |
552 | (sleep (if (eq? status 'new) 60 30)) | |
553 | ||
554 | (loop (query-vault id kind))))))))) | |
555 | ||
556 | \f | |
557 | ;;; | |
558 | ;;; High-level interface. | |
559 | ;;; | |
560 | ||
561 | (define (commit-id? reference) | |
562 | "Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if | |
d370cc73 | 563 | it is a tag name. This is based on a simple heuristic so use with care!" |
de2bfe90 LC |
564 | (and (= (string-length reference) 40) |
565 | (string-every char-set:hex-digit reference))) | |
566 | ||
567 | (define (call-with-temporary-directory proc) ;FIXME: factorize | |
568 | "Call PROC with a name of a temporary directory; close the directory and | |
569 | delete it when leaving the dynamic extent of this call." | |
570 | (let* ((directory (or (getenv "TMPDIR") "/tmp")) | |
571 | (template (string-append directory "/guix-directory.XXXXXX")) | |
572 | (tmp-dir (mkdtemp! template))) | |
573 | (dynamic-wind | |
574 | (const #t) | |
575 | (lambda () | |
576 | (proc tmp-dir)) | |
577 | (lambda () | |
578 | (false-if-exception (delete-file-recursively tmp-dir)))))) | |
579 | ||
b8815c5e LC |
580 | (define* (swh-download url reference output |
581 | #:key (log-port (current-error-port))) | |
de2bfe90 LC |
582 | "Download from Software Heritage a checkout of the Git tag or commit |
583 | REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success | |
584 | and #f on failure. | |
585 | ||
586 | This procedure uses the \"vault\", which contains \"cooked\" directories in | |
587 | the form of tarballs. If the requested directory is not cooked yet, it will | |
588 | wait until it becomes available, which could take several minutes." | |
589 | (match (if (commit-id? reference) | |
590 | (lookup-revision reference) | |
591 | (lookup-origin-revision url reference)) | |
592 | ((? revision? revision) | |
b8815c5e LC |
593 | (format log-port "SWH: found revision ~a with directory at '~a'~%" |
594 | (revision-id revision) | |
595 | (swh-url (revision-directory-url revision))) | |
de2bfe90 LC |
596 | (call-with-temporary-directory |
597 | (lambda (directory) | |
b8815c5e LC |
598 | (match (vault-fetch (revision-directory revision) 'directory |
599 | #:log-port log-port) | |
90c98b5a | 600 | (#f |
b8815c5e LC |
601 | (format log-port |
602 | "SWH: directory ~a could not be fetched from the vault~%" | |
603 | (revision-directory revision)) | |
90c98b5a LC |
604 | #f) |
605 | ((? port? input) | |
606 | (let ((tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-"))) | |
607 | (dump-port input tar) | |
608 | (close-port input) | |
609 | (let ((status (close-pipe tar))) | |
610 | (unless (zero? status) | |
611 | (error "tar extraction failure" status))) | |
612 | ||
613 | (match (scandir directory) | |
614 | (("." ".." sub-directory) | |
615 | (copy-recursively (string-append directory "/" sub-directory) | |
616 | output | |
617 | #:log (%make-void-port "w")) | |
618 | #t)))))))) | |
de2bfe90 LC |
619 | (#f |
620 | #f))) |