1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2015, 2016, 2017, 2018, 2019, 2020 Ricardo Wurmus <rekado@elephly.net>
3 ;;; Copyright © 2015, 2016, 2017, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
4 ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
6 ;;; This file is part of GNU Guix.
8 ;;; GNU Guix is free software; you can redistribute it and/or modify it
9 ;;; under the terms of the GNU General Public License as published by
10 ;;; the Free Software Foundation; either version 3 of the License, or (at
11 ;;; your option) any later version.
13 ;;; GNU Guix is distributed in the hope that it will be useful, but
14 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;;; GNU General Public License for more details.
18 ;;; You should have received a copy of the GNU General Public License
19 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
21 (define-module (guix import cran)
22 #:use-module (ice-9 match)
23 #:use-module (ice-9 regex)
24 #:use-module (ice-9 popen)
25 #:use-module ((ice-9 rdelim) #:select (read-string read-line))
26 #:use-module (srfi srfi-1)
27 #:use-module (srfi srfi-2)
28 #:use-module (srfi srfi-11)
29 #:use-module (srfi srfi-26)
30 #:use-module (srfi srfi-34)
31 #:use-module (ice-9 receive)
32 #:use-module (web uri)
33 #:use-module (guix memoization)
34 #:use-module (guix http-client)
35 #:use-module (gcrypt hash)
36 #:use-module (guix store)
37 #:use-module ((guix serialization) #:select (write-file))
38 #:use-module (guix base32)
39 #:use-module ((guix download) #:select (download-to-store))
40 #:use-module (guix import utils)
41 #:use-module ((guix build utils)
43 delete-file-recursively
44 with-directory-excursion))
45 #:use-module (guix utils)
46 #:use-module (guix git)
47 #:use-module ((guix build-system r) #:select (cran-uri bioconductor-uri))
48 #:use-module (guix ui)
49 #:use-module (guix upstream)
50 #:use-module (guix packages)
51 #:use-module (gnu packages)
52 #:export (cran->guix-package
53 bioconductor->guix-package
61 bioconductor-data-package?
62 bioconductor-experiment-package?
65 description->package))
69 ;;; Generate a package declaration template for the latest version of an R
70 ;;; package on CRAN, using the DESCRIPTION file downloaded from
71 ;;; cran.r-project.org.
75 (define string->license
78 ("Artistic-2.0" 'artistic2.0)
79 ("Apache License 2.0" 'asl2.0)
80 ("BSD_2_clause" 'bsd-2)
81 ("BSD_2_clause + file LICENSE" 'bsd-2)
82 ("BSD_3_clause" 'bsd-3)
83 ("BSD_3_clause + file LICENSE" 'bsd-3)
84 ("GPL" '(list gpl2+ gpl3+))
92 ("LGPL (>= 2)" 'lgpl2.0+)
93 ("LGPL (>= 2.1)" 'lgpl2.1+)
94 ("LGPL (>= 3)" 'lgpl3+)
96 ("MIT + file LICENSE" 'expat)
97 ((x) (string->license x))
98 ((lst ...) `(list ,@(map string->license lst)))
102 (define (description->alist description)
103 "Convert a DESCRIPTION string into an alist."
104 (let ((lines (string-split description #\newline))
105 (parse (lambda (line acc)
106 (if (string-null? line) acc
107 ;; Keys usually start with a capital letter and end with
108 ;; ":". There are some exceptions, unfortunately (such
109 ;; as "biocViews"). There are no blanks in a key.
110 (if (string-match "^[A-Za-z][^ :]+:( |\n|$)" line)
111 ;; New key/value pair
112 (let* ((pos (string-index line #\:))
113 (key (string-take line pos))
114 (value (string-drop line (+ 1 pos))))
116 (string-trim-both value))
118 ;; This is a continuation of the previous pair
119 (match-let ((((key . value) . rest) acc))
120 (cons (cons key (string-join
122 (string-trim-both line))))
124 (fold parse '() lines)))
126 (define (format-inputs names)
127 "Generate a sorted list of package inputs from a list of package NAMES."
129 (list name (list 'unquote (string->symbol name))))
130 (sort names string-ci<?)))
132 (define* (maybe-inputs package-inputs #:optional (type 'inputs))
133 "Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a
135 (match package-inputs
138 ((package-inputs ...)
139 `((,type (,'quasiquote ,(format-inputs package-inputs)))))))
141 (define %cran-url "https://cran.r-project.org/web/packages/")
142 (define %bioconductor-url "https://bioconductor.org/packages/")
144 ;; The latest Bioconductor release is 3.11. Bioconductor packages should be
146 (define %bioconductor-version "3.11")
148 (define* (bioconductor-packages-list-url #:optional type)
149 (string-append "https://bioconductor.org/packages/"
150 %bioconductor-version
152 ('annotation "/data/annotation")
153 ('experiment "/data/experiment")
155 "/src/contrib/PACKAGES"))
157 (define* (bioconductor-packages-list #:optional type)
158 "Return the latest version of package NAME for the current bioconductor
160 (let ((url (string->uri (bioconductor-packages-list-url type))))
161 (guard (c ((http-get-error? c)
162 (format (current-error-port)
163 "error: failed to retrieve list of packages from ~s: ~a (~s)~%"
164 (uri->string (http-get-error-uri c))
165 (http-get-error-code c)
166 (http-get-error-reason c))
168 ;; Split the big list on empty lines, then turn each chunk into an
169 ;; alist of attributes.
171 (description->alist (string-join chunk "\n")))
172 (let* ((port (http-fetch/cached url))
173 (lines (read-lines port)))
175 (chunk-lines lines))))))
177 (define* (latest-bioconductor-package-version name #:optional type)
178 "Return the version string corresponding to the latest release of the
179 bioconductor package NAME, or #F if the package is unknown."
180 (and=> (find (lambda (meta)
181 (string=? (assoc-ref meta "Package") name))
182 (bioconductor-packages-list type))
183 (cut assoc-ref <> "Version")))
185 ;; XXX taken from (guix scripts hash)
186 (define (vcs-file? file stat)
187 (case (stat:type stat)
189 (member (basename file) '(".bzr" ".git" ".hg" ".svn" "CVS")))
191 ;; Git sub-modules have a '.git' file that is a regular text file.
192 (string=? (basename file) ".git"))
196 ;; Little helper to download URLs only once.
199 (lambda* (url #:key method)
203 (latest-repository-commit store url))
205 (call-with-temporary-directory
207 (unless (zero? (system* "hg" "clone" url dir))
208 (leave (G_ "~A: hg download failed~%") url))
209 (with-directory-excursion dir
210 (let* ((port (open-pipe* OPEN_READ "hg" "id" "--id"))
211 (changeset (string-trim-right (read-string port))))
213 (for-each delete-file-recursively
214 (find-files dir "^\\.hg$" #:directories? #t))
215 (let ((store-directory
216 (add-to-store store (basename url) #t "sha256" dir)))
217 (values store-directory changeset)))))))
218 (else (download-to-store store url)))))))
220 (define (fetch-description repository name)
221 "Return an alist of the contents of the DESCRIPTION file for the R package
222 NAME in the given REPOSITORY, or #f in case of failure. NAME is
226 (let ((url (string-append %cran-url name "/DESCRIPTION")))
227 (guard (c ((http-get-error? c)
228 (format (current-error-port)
229 "error: failed to retrieve package information \
231 (uri->string (http-get-error-uri c))
232 (http-get-error-code c)
233 (http-get-error-reason c))
235 (let* ((port (http-fetch url))
236 (result (description->alist (read-string port))))
240 ;; Currently, the bioconductor project does not offer a way to access a
241 ;; package's DESCRIPTION file over HTTP, so we determine the version,
242 ;; download the source tarball, and then extract the DESCRIPTION file.
244 (and (latest-bioconductor-package-version name) #t)
245 (and (latest-bioconductor-package-version name 'annotation) 'annotation)
246 (and (latest-bioconductor-package-version name 'experiment) 'experiment)))
247 (version (latest-bioconductor-package-version name type))
248 (url (car (bioconductor-uri name version type)))
249 (tarball (download url)))
250 (call-with-temporary-directory
252 (parameterize ((current-error-port (%make-void-port "rw+"))
253 (current-output-port (%make-void-port "rw+")))
254 (and (zero? (system* "tar" "--wildcards" "-x"
255 "--strip-components=1"
257 "-f" tarball "*/DESCRIPTION"))
258 (and=> (description->alist (with-input-from-file
259 (string-append dir "/DESCRIPTION") read-string))
261 (if (boolean? type) meta
262 (cons `(bioconductor-type . ,type) meta))))))))))
264 (and (string-prefix? "http" name)
265 ;; Download the git repository at "NAME"
267 (lambda () (download name #:method 'git))
269 (and=> (description->alist (with-input-from-file
270 (string-append dir "/DESCRIPTION") read-string))
272 (cons* `(git . ,name)
273 `(git-commit . ,commit)
276 (and (string-prefix? "http" name)
277 ;; Download the mercurial repository at "NAME"
279 (lambda () (download name #:method 'hg))
280 (lambda (dir changeset)
281 (and=> (description->alist (with-input-from-file
282 (string-append dir "/DESCRIPTION") read-string))
285 `(hg-changeset . ,changeset)
288 (define (listify meta field)
289 "Look up FIELD in the alist META. If FIELD contains a comma-separated
290 string, turn it into a list and strip off parenthetic expressions. Return the
291 empty list when the FIELD cannot be found."
292 (let ((value (assoc-ref meta field)))
295 ;; Strip off parentheses
296 (let ((items (string-split (regexp-substitute/global
297 #f "( *\\([^\\)]+\\)) *"
300 (remove (lambda (item)
301 (or (string-null? item)
302 ;; When there is whitespace inside of items it is
303 ;; probably because this was not an actual list to
305 (string-any char-set:whitespace item)))
306 (map string-trim-both items))))))
308 ;; Trick Guile 3 so that it keeps the 'listify' binding accessible *and*
309 ;; private even though this module is declarative.
310 (set! listify listify)
312 (define default-r-packages
329 ;; The field for system dependencies is often abused to specify non-package
330 ;; dependencies (such as c++11). This list is used to ignore them.
331 (define invalid-packages
334 (define cran-guix-name (cut guix-name "r-" <>))
336 (define (tarball-needs-fortran? tarball)
337 "Check if the TARBALL contains Fortran source files."
338 (define (check pattern)
339 (parameterize ((current-error-port (%make-void-port "rw+"))
340 (current-output-port (%make-void-port "rw+")))
341 (zero? (system* "tar" "--wildcards" "--list" pattern "-f" tarball))))
346 (define (directory-needs-fortran? dir)
347 "Check if the directory DIR contains Fortran source files."
348 (match (find-files dir "\\.f(90|95)?")
352 (define (needs-fortran? thing tarball?)
353 "Check if the THING contains Fortran source files."
355 (tarball-needs-fortran? thing)
356 (directory-needs-fortran? thing)))
358 (define (files-match-pattern? directory regexp . file-patterns)
359 "Return #T if any of the files matching FILE-PATTERNS in the DIRECTORY match
361 (let ((pattern (make-regexp regexp)))
363 (call-with-input-file file
366 (let ((line (read-line port)))
368 ((eof-object? line) #f)
369 ((regexp-exec pattern line) #t)
371 (apply find-files directory file-patterns))))
373 (define (tarball-files-match-pattern? tarball regexp . file-patterns)
374 "Return #T if any of the files represented by FILE-PATTERNS in the TARBALL
375 match the given REGEXP."
376 (call-with-temporary-directory
378 (parameterize ((current-error-port (%make-void-port "rw+")))
380 "xf" tarball "-C" dir
381 `("--wildcards" ,@file-patterns)))
382 (files-match-pattern? dir regexp))))
384 (define (directory-needs-zlib? dir)
385 "Return #T if any of the Makevars files in the src directory DIR contain a
387 (files-match-pattern? dir "-lz" "(Makevars.*|configure.*)"))
389 (define (tarball-needs-zlib? tarball)
390 "Return #T if any of the Makevars files in the src directory of the TARBALL
391 contain a zlib linker flag."
392 (tarball-files-match-pattern?
394 "*/src/Makevars*" "*/src/configure*" "*/configure*"))
396 (define (needs-zlib? thing tarball?)
397 "Check if the THING contains files indicating a dependency on zlib."
399 (tarball-needs-zlib? thing)
400 (directory-needs-zlib? thing)))
402 (define (directory-needs-pkg-config? dir)
403 "Return #T if any of the Makevars files in the src directory DIR reference
404 the pkg-config tool."
405 (files-match-pattern? dir "pkg-config"
406 "(Makevars.*|configure.*)"))
408 (define (tarball-needs-pkg-config? tarball)
409 "Return #T if any of the Makevars files in the src directory of the TARBALL
410 reference the pkg-config tool."
411 (tarball-files-match-pattern?
413 "*/src/Makevars*" "*/src/configure*" "*/configure*"))
415 (define (needs-pkg-config? thing tarball?)
416 "Check if the THING contains files indicating a dependency on pkg-config."
418 (tarball-needs-pkg-config? thing)
419 (directory-needs-pkg-config? thing)))
421 (define (needs-knitr? meta)
422 (member "knitr" (listify meta "VignetteBuilder")))
424 ;; XXX adapted from (guix scripts hash)
425 (define (file-hash file select? recursive?)
426 ;; Compute the hash of FILE.
428 (let-values (((port get-hash) (open-sha256-port)))
429 (write-file file port #:select? select?)
432 (call-with-input-file file port-sha256)))
434 (define (description->package repository meta)
435 "Return the `package' s-expression for an R package published on REPOSITORY
436 from the alist META, which was derived from the R package's DESCRIPTION file."
437 (let* ((base-url (case repository
439 ((bioconductor) %bioconductor-url)
442 (uri-helper (case repository
444 ((bioconductor) bioconductor-uri)
447 (name (assoc-ref meta "Package"))
448 (synopsis (assoc-ref meta "Title"))
449 (version (assoc-ref meta "Version"))
450 (license (string->license (assoc-ref meta "License")))
451 ;; Some packages have multiple home pages. Some have none.
452 (home-page (case repository
453 ((git) (assoc-ref meta 'git))
454 ((hg) (assoc-ref meta 'hg))
455 (else (match (listify meta "URL")
457 (_ (string-append base-url name))))))
458 (source-url (case repository
459 ((git) (assoc-ref meta 'git))
460 ((hg) (assoc-ref meta 'hg))
462 (match (apply uri-helper name version
465 (list (assoc-ref meta 'bioconductor-type)))
468 ((? string? url) url)
470 (git? (assoc-ref meta 'git))
471 (hg? (assoc-ref meta 'hg))
472 (source (download source-url #:method (cond
477 (if (needs-zlib? source (not (or git? hg?))) '("zlib") '())
478 (filter (lambda (name)
479 (not (member name invalid-packages)))
480 (map string-downcase (listify meta "SystemRequirements")))))
481 (propagate (filter (lambda (name)
482 (not (member name (append default-r-packages
485 (listify meta "Imports")
486 (listify meta "LinkingTo")
488 (listify meta "Depends")))))
491 (name ,(cran-guix-name name))
492 (version ,(case repository
494 `(git-version ,version revision commit))
496 `(string-append ,version "-" revision "." changeset))
503 (uri ,(case repository
506 (url ,(assoc-ref meta 'git))
510 (url ,(assoc-ref meta 'hg))
511 (changeset changeset)))
513 `(,(procedure-name uri-helper) ,name version
514 ,@(or (and=> (assoc-ref meta 'bioconductor-type)
516 (list (list 'quote type))))
520 '((file-name (git-file-name name version))))
522 '((file-name (string-append name "-" version "-checkout"))))
526 ,(bytevector->nix-base32-string
529 (file-hash source (negate vcs-file?) #t))
531 (file-hash source (negate vcs-file?) #t))
532 (else (file-sha256 source))))))))
533 ,@(if (not (and git? hg?
534 (equal? (string-append "r-" name)
535 (cran-guix-name name))))
536 `((properties ,`(,'quasiquote ((,'upstream-name . ,name)))))
538 (build-system r-build-system)
539 ,@(maybe-inputs sysdepends)
540 ,@(maybe-inputs (map cran-guix-name propagate) 'propagated-inputs)
542 `(,@(if (needs-fortran? source (not (or git? hg?)))
544 ,@(if (needs-pkg-config? source (not (or git? hg?)))
546 ,@(if (needs-knitr? meta)
549 (home-page ,(if (string-null? home-page)
550 (string-append base-url name)
553 (description ,(beautify-description (or (assoc-ref meta "Description")
555 (license ,license))))
559 `(let ((commit ,(assoc-ref meta 'git-commit))
563 `(let ((changeset ,(assoc-ref meta 'hg-changeset))
569 (define cran->guix-package
571 (lambda* (package-name #:optional (repo 'cran))
572 "Fetch the metadata for PACKAGE-NAME from REPO and return the `package'
573 s-expression corresponding to that package, or #f on failure."
574 (let ((description (fetch-description repo package-name)))
576 (description->package repo description)
579 ;; Retry import from Bioconductor
580 (cran->guix-package package-name 'bioconductor))
582 ;; Retry import from Bioconductor
583 (cran->guix-package package-name 'bioconductor))
585 ;; Retry import from CRAN
586 (cran->guix-package package-name 'cran))
587 (else (values #f '()))))))))
589 (define* (cran-recursive-import package-name #:optional (repo 'cran))
590 (recursive-import package-name repo
591 #:repo->guix-package cran->guix-package
592 #:guix-name cran-guix-name))
599 (define (package->upstream-name package)
600 "Return the upstream name of the PACKAGE."
601 (let* ((properties (package-properties package))
602 (upstream-name (and=> properties
603 (cut assoc-ref <> 'upstream-name))))
606 (match (package-source package)
608 (match (origin-uri origin)
609 ((or (? string? url) (url _ ...))
610 (let ((end (string-rindex url #\_))
611 (start (string-rindex url #\/)))
613 ;; (string-append "/" name "_" version ".tar.gz")
614 (and start end (substring url (+ start 1) end))))
618 (define (latest-cran-release pkg)
619 "Return an <upstream-source> for the latest release of the package PKG."
621 (define upstream-name
622 (package->upstream-name pkg))
625 (fetch-description 'cran upstream-name))
628 (let ((version (assoc-ref meta "Version")))
629 ;; CRAN does not provide signatures.
631 (package (package-name pkg))
633 (urls (cran-uri upstream-name version))
636 (description->package 'cran meta)))))))
638 (define (latest-bioconductor-release pkg)
639 "Return an <upstream-source> for the latest release of the package PKG."
641 (define upstream-name
642 (package->upstream-name pkg))
645 (latest-bioconductor-package-version upstream-name))
648 ;; Bioconductor does not provide signatures.
650 (package (package-name pkg))
652 (urls (bioconductor-uri upstream-name version))
656 (cran->guix-package upstream-name 'bioconductor))))))
658 (define (cran-package? package)
659 "Return true if PACKAGE is an R package from CRAN."
660 (and (string-prefix? "r-" (package-name package))
661 ;; Check if the upstream name can be extracted from package uri.
662 (package->upstream-name package)
663 ;; Check if package uri(s) are prefixed by "mirror://cran".
664 ((url-predicate (cut string-prefix? "mirror://cran" <>)) package)))
666 (define (bioconductor-package? package)
667 "Return true if PACKAGE is an R package from Bioconductor."
668 (let ((predicate (lambda (uri)
669 (and (string-prefix? "https://bioconductor.org" uri)
670 ;; Data packages are neither listed in SVN nor on
671 ;; the Github mirror, so we have to exclude them
672 ;; from the set of bioconductor packages that can be
673 ;; updated automatically.
674 (not (string-contains uri "/data/annotation/"))
675 ;; Experiment packages are in a separate repository.
676 (not (string-contains uri "/data/experiment/"))))))
677 (and (string-prefix? "r-" (package-name package))
678 ((url-predicate predicate) package))))
680 (define (bioconductor-data-package? package)
681 "Return true if PACKAGE is an R data package from Bioconductor."
682 (let ((predicate (lambda (uri)
683 (and (string-prefix? "https://bioconductor.org" uri)
684 (string-contains uri "/data/annotation/")))))
685 (and (string-prefix? "r-" (package-name package))
686 ((url-predicate predicate) package))))
688 (define (bioconductor-experiment-package? package)
689 "Return true if PACKAGE is an R experiment package from Bioconductor."
690 (let ((predicate (lambda (uri)
691 (and (string-prefix? "https://bioconductor.org" uri)
692 (string-contains uri "/data/experiment/")))))
693 (and (string-prefix? "r-" (package-name package))
694 ((url-predicate predicate) package))))
696 (define %cran-updater
699 (description "Updater for CRAN packages")
701 (latest latest-cran-release)))
703 (define %bioconductor-updater
706 (description "Updater for Bioconductor packages")
707 (pred bioconductor-package?)
708 (latest latest-bioconductor-release)))
710 ;;; cran.scm ends here