;;; GNU Guix --- Functional package management for GNU
-;;; Copyright © 2018 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
+;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
;;;
;;; This file is part of GNU Guix.
;;;
#:use-module (guix base16)
#:use-module (guix build utils)
#:use-module ((guix build syscalls) #:select (mkdtemp!))
+ #:use-module (web uri)
+ #:use-module (guix json)
#:use-module (web client)
#:use-module (web response)
#:use-module (json)
#:use-module (ice-9 regex)
#:use-module (ice-9 popen)
#:use-module ((ice-9 ftw) #:select (scandir))
- #:export (origin?
- origin-id
+ #:export (%swh-base-url
+ %allow-request?
+
+ request-rate-limit-reached?
+
+ origin?
origin-type
origin-url
origin-visits
request-cooking
vault-fetch
+ commit-id?
+
swh-download))
;;; Commentary:
(define %swh-base-url
;; Presumably we won't need to change it.
- "https://archive.softwareheritage.org")
+ (make-parameter "https://archive.softwareheritage.org"))
(define (swh-url path . rest)
+ ;; URLs returned by the API may be relative or absolute. This has changed
+ ;; without notice before. Handle both cases by detecting whether the path
+ ;; starts with a domain.
+ (define root
+ (if (string-prefix? "/" path)
+ (string-append (%swh-base-url) path)
+ path))
+
(define url
- (string-append %swh-base-url path
- (string-join rest "/" 'prefix)))
+ (string-append root (string-join rest "/" 'prefix)))
;; Ensure there's a trailing slash or we get a redirect.
(if (string-suffix? "/" url)
url
(string-append url "/")))
-(define-syntax-rule (define-json-reader json->record ctor spec ...)
- "Define JSON->RECORD as a procedure that converts a JSON representation,
-read from a port, string, or hash table, into a record created by CTOR and
-following SPEC, a series of field specifications."
- (define (json->record input)
- (let ((table (cond ((port? input)
- (json->scm input))
- ((string? input)
- (json-string->scm input))
- ((hash-table? input)
- input))))
- (let-syntax ((extract-field (syntax-rules ()
- ((_ table (field key json->value))
- (json->value (hash-ref table key)))
- ((_ table (field key))
- (hash-ref table key))
- ((_ table (field))
- (hash-ref table
- (symbol->string 'field))))))
- (ctor (extract-field table spec) ...)))))
-
-(define-syntax-rule (define-json-mapping rtd ctor pred json->record
- (field getter spec ...) ...)
- "Define RTD as a record type with the given FIELDs and GETTERs, à la SRFI-9,
-and define JSON->RECORD as a conversion from JSON to a record of this type."
- (begin
- (define-record-type rtd
- (ctor field ...)
- pred
- (field getter) ...)
-
- (define-json-reader json->record ctor
- (field spec ...) ...)))
-
(define %date-regexp
;; Match strings like "2014-11-17T22:09:38+01:00" or
;; "2018-09-30T23:20:07.815449+00:00"".
(ref 10))))))
str)) ;oops!
+(define string*
+ ;; Converts "string or #nil" coming from JSON to "string or #f".
+ (match-lambda
+ ((? string? str) str)
+ ((? null?) #f) ;Guile-JSON 3.x
+ ('null #f))) ;Guile-JSON 4.x
+
+(define %allow-request?
+ ;; Takes a URL and method (e.g., the 'http-get' procedure) and returns true
+ ;; to keep going. This can be used to disallow a requests when
+ ;; 'request-rate-limit-reached?' returns true, for instance.
+ (make-parameter (const #t)))
+
+;; The time when the rate limit for "/origin/save" POST requests and that of
+;; other requests will be reset.
+;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
+(define %save-rate-limit-reset-time 0)
+(define %general-rate-limit-reset-time 0)
+
+(define (request-rate-limit-reached? url method)
+ "Return true if the rate limit has been reached for URI."
+ (define uri
+ (string->uri url))
+
+ (define reset-time
+ (if (and (eq? method http-post)
+ (string-prefix? "/api/1/origin/save/" (uri-path uri)))
+ %save-rate-limit-reset-time
+ %general-rate-limit-reset-time))
+
+ (< (car (gettimeofday)) reset-time))
+
+(define (update-rate-limit-reset-time! url method response)
+ "Update the rate limit reset time for URL and METHOD based on the headers in
+RESPONSE."
+ (let ((uri (string->uri url)))
+ (match (assq-ref (response-headers response) 'x-ratelimit-reset)
+ ((= string->number (? number? reset))
+ (if (and (eq? method http-post)
+ (string-prefix? "/api/1/origin/save/" (uri-path uri)))
+ (set! %save-rate-limit-reset-time reset)
+ (set! %general-rate-limit-reset-time reset)))
+ (_
+ #f))))
+
(define* (call url decode #:optional (method http-get)
#:key (false-if-404? #t))
"Invoke the endpoint at URL using METHOD. Decode the resulting JSON body
using DECODE, a one-argument procedure that takes an input port. When
FALSE-IF-404? is true, return #f upon 404 responses."
- (let*-values (((response port)
- (method url #:streaming? #t)))
- ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
- (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
- (#f #t)
- ((? (compose zero? string->number))
- (throw 'swh-error url response))
- (_ #t))
-
- (cond ((= 200 (response-code response))
- (let ((result (decode port)))
- (close-port port)
- result))
- ((and false-if-404?
- (= 404 (response-code response)))
- (close-port port)
- #f)
- (else
- (close-port port)
- (throw 'swh-error url response)))))
+ (and ((%allow-request?) url method)
+ (let*-values (((response port)
+ (method url #:streaming? #t)))
+ ;; See <https://archive.softwareheritage.org/api/#rate-limiting>.
+ (match (assq-ref (response-headers response) 'x-ratelimit-remaining)
+ (#f #t)
+ ((? (compose zero? string->number))
+ (update-rate-limit-reset-time! url method response)
+ (throw 'swh-error url method response))
+ (_ #t))
+
+ (cond ((= 200 (response-code response))
+ (let ((result (decode port)))
+ (close-port port)
+ result))
+ ((and false-if-404?
+ (= 404 (response-code response)))
+ (close-port port)
+ #f)
+ (else
+ (close-port port)
+ (throw 'swh-error url method response))))))
(define-syntax define-query
(syntax-rules (path)
docstring
(call (swh-url components ...) json->value)))))
-;; <https://archive.softwareheritage.org/api/1/origin/git/url/https://github.com/guix-mirror/guix/>
+;; <https://archive.softwareheritage.org/api/1/origin/https://github.com/guix-mirror/guix/get>
(define-json-mapping <origin> make-origin origin?
json->origin
- (id origin-id)
(visits-url origin-visits-url "origin_visits_url")
(type origin-type)
(url origin-url))
(date visit-date "date" string->date*)
(origin visit-origin)
(url visit-url "origin_visit_url")
- (snapshot-url visit-snapshot-url "snapshot_url")
- (status visit-status)
+ (snapshot-url visit-snapshot-url "snapshot_url" string*) ;string | #f
+ (status visit-status "status" string->symbol) ;'full | 'partial | 'ongoing
(number visit-number "visit"))
;; <https://archive.softwareheritage.org/api/1/snapshot/4334c3ed4bb208604ed780d8687fe523837f1bd1/>
(target-url branch-target-url))
(define (json->branches branches)
- (hash-map->list (lambda (key value)
- (make-branch key
- (string->symbol
- (hash-ref value "target_type"))
- (hash-ref value "target_url")))
- branches))
+ (map (match-lambda
+ ((key . value)
+ (make-branch key
+ (string->symbol
+ (assoc-ref value "target_type"))
+ (assoc-ref value "target_url"))))
+ branches))
;; <https://archive.softwareheritage.org/api/1/release/1f44934fb6e2cefccbecd4fa347025349fa9ff76/>
(define-json-mapping <release> make-release release?
(license-url content-license-url "license_url"))
(define (json->checksums checksums)
- (hash-map->list (lambda (key value)
- (cons key (base16-string->bytevector value)))
- checksums))
+ (map (match-lambda
+ ((key . value)
+ (cons key (base16-string->bytevector value))))
+ checksums))
;; <https://archive.softwareheritage.org/api/1/directory/27c69c5d298a43096a53affbf881e7b13f17bdcd/>
(define-json-mapping <directory-entry> make-directory-entry directory-entry?
(define-query (lookup-origin url)
"Return an origin for URL."
- (path "/api/1/origin/git/url" url)
+ (path "/api/1/origin" url "get")
json->origin)
(define-query (lookup-content hash type)
json->directory-entries)
(define (json->directory-entries port)
- (map json->directory-entry (json->scm port)))
+ (map json->directory-entry
+ (vector->list (json->scm port))))
(define (origin-visits origin)
"Return the list of visits of ORIGIN, a record as returned by
'lookup-origin'."
(call (swh-url (origin-visits-url origin))
(lambda (port)
- (map json->visit (json->scm port)))))
+ (map json->visit (vector->list (json->scm port))))))
(define (visit-snapshot visit)
- "Return the snapshot corresponding to VISIT."
- (call (swh-url (visit-snapshot-url visit))
- json->snapshot))
+ "Return the snapshot corresponding to VISIT or #f if no snapshot is
+available."
+ (and (visit-snapshot-url visit)
+ (call (swh-url (visit-snapshot-url visit))
+ json->snapshot)))
(define (branch-target branch)
"Return the target of BRANCH, either a <revision> or a <release>."
"Return a <revision> corresponding to the given TAG for the repository
coming from URL. Example:
- (lookup-origin-release \"https://github.com/guix-mirror/guix/\" \"v0.8\")
+ (lookup-origin-revision \"https://github.com/guix-mirror/guix/\" \"v0.8\")
=> #<<revision> id: \"44941…\" …>
The information is based on the latest visit of URL available. Return #f if
(match (lookup-origin url)
(#f #f)
(origin
- (match (origin-visits origin)
+ (match (filter visit-snapshot-url (origin-visits origin))
((visit . _)
(let ((snapshot (visit-snapshot visit)))
(match (and=> (find (lambda (branch)
(define (commit-id? reference)
"Return true if REFERENCE is likely a commit ID, false otherwise---e.g., if
-it is a tag name."
+it is a tag name. This is based on a simple heuristic so use with care!"
(and (= (string-length reference) 40)
(string-every char-set:hex-digit reference)))
(lambda ()
(false-if-exception (delete-file-recursively tmp-dir))))))
-(define (swh-download url reference output)
+(define* (swh-download url reference output
+ #:key (log-port (current-error-port)))
"Download from Software Heritage a checkout of the Git tag or commit
REFERENCE originating from URL, and unpack it in OUTPUT. Return #t on success
and #f on failure.
(lookup-revision reference)
(lookup-origin-revision url reference))
((? revision? revision)
+ (format log-port "SWH: found revision ~a with directory at '~a'~%"
+ (revision-id revision)
+ (swh-url (revision-directory-url revision)))
(call-with-temporary-directory
(lambda (directory)
- (let ((input (vault-fetch (revision-directory revision) 'directory))
- (tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-")))
- (dump-port input tar)
- (close-port input)
- (let ((status (close-pipe tar)))
- (unless (zero? status)
- (error "tar extraction failure" status)))
-
- (match (scandir directory)
- (("." ".." sub-directory)
- (copy-recursively (string-append directory "/" sub-directory)
- output
- #:log (%make-void-port "w"))
- #t))))))
+ (match (vault-fetch (revision-directory revision) 'directory
+ #:log-port log-port)
+ (#f
+ (format log-port
+ "SWH: directory ~a could not be fetched from the vault~%"
+ (revision-directory revision))
+ #f)
+ ((? port? input)
+ (let ((tar (open-pipe* OPEN_WRITE "tar" "-C" directory "-xzvf" "-")))
+ (dump-port input tar)
+ (close-port input)
+ (let ((status (close-pipe tar)))
+ (unless (zero? status)
+ (error "tar extraction failure" status)))
+
+ (match (scandir directory)
+ (("." ".." sub-directory)
+ (copy-recursively (string-append directory "/" sub-directory)
+ output
+ #:log (%make-void-port "w"))
+ #t))))))))
(#f
#f)))