gnu: Add r-all.
[jackhill/guix/guix.git] / guix / import / cran.scm
CommitLineData
e1248602 1;;; GNU Guix --- Functional package management for GNU
91e05559 2;;; Copyright © 2015, 2016, 2017, 2018, 2019 Ricardo Wurmus <rekado@elephly.net>
f9704f17 3;;; Copyright © 2015, 2016, 2017 Ludovic Courtès <ludo@gnu.org>
db427602 4;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
e1248602
RW
5;;;
6;;; This file is part of GNU Guix.
7;;;
8;;; GNU Guix is free software; you can redistribute it and/or modify it
9;;; under the terms of the GNU General Public License as published by
10;;; the Free Software Foundation; either version 3 of the License, or (at
11;;; your option) any later version.
12;;;
13;;; GNU Guix is distributed in the hope that it will be useful, but
14;;; WITHOUT ANY WARRANTY; without even the implied warranty of
15;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16;;; GNU General Public License for more details.
17;;;
18;;; You should have received a copy of the GNU General Public License
19;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20
21(define-module (guix import cran)
22 #:use-module (ice-9 match)
23 #:use-module (ice-9 regex)
2dca8b2d 24 #:use-module ((ice-9 rdelim) #:select (read-string read-line))
e1248602 25 #:use-module (srfi srfi-1)
10a1cacb 26 #:use-module (srfi srfi-2)
d882c235 27 #:use-module (srfi srfi-26)
fdbc84b0 28 #:use-module (srfi srfi-34)
ad68f7fa 29 #:use-module (ice-9 receive)
fdbc84b0 30 #:use-module (web uri)
f9704f17 31 #:use-module (guix memoization)
e1248602 32 #:use-module (guix http-client)
ca719424 33 #:use-module (gcrypt hash)
e1248602
RW
34 #:use-module (guix store)
35 #:use-module (guix base32)
36 #:use-module ((guix download) #:select (download-to-store))
37 #:use-module (guix import utils)
2dca8b2d
RW
38 #:use-module ((guix build utils) #:select (find-files))
39 #:use-module (guix utils)
d0bd632f 40 #:use-module ((guix build-system r) #:select (cran-uri bioconductor-uri))
d882c235
LC
41 #:use-module (guix upstream)
42 #:use-module (guix packages)
94e907b9 43 #:use-module (gnu packages)
d882c235 44 #:export (cran->guix-package
d0bd632f 45 bioconductor->guix-package
ae9e5d66 46 cran-recursive-import
d0bd632f 47 %cran-updater
ff8c179a
RW
48 %bioconductor-updater
49
50 cran-package?
51 bioconductor-package?
52 bioconductor-data-package?
53 bioconductor-experiment-package?))
e1248602
RW
54
55;;; Commentary:
56;;;
57;;; Generate a package declaration template for the latest version of an R
0f6b9e98 58;;; package on CRAN, using the DESCRIPTION file downloaded from
e1248602
RW
59;;; cran.r-project.org.
60;;;
61;;; Code:
62
63(define string->license
64 (match-lambda
65 ("AGPL-3" 'agpl3+)
66 ("Artistic-2.0" 'artistic2.0)
67 ("Apache License 2.0" 'asl2.0)
68 ("BSD_2_clause" 'bsd-2)
576eda6d 69 ("BSD_2_clause + file LICENSE" 'bsd-2)
e1248602 70 ("BSD_3_clause" 'bsd-3)
576eda6d 71 ("BSD_3_clause + file LICENSE" 'bsd-3)
94a400be 72 ("GPL" '(list gpl2+ gpl3+))
b6a22275
RW
73 ("GPL (>= 2)" 'gpl2+)
74 ("GPL (>= 3)" 'gpl3+)
13f54d08
RW
75 ("GPL-2" 'gpl2)
76 ("GPL-3" 'gpl3)
77 ("LGPL-2" 'lgpl2.0)
78 ("LGPL-2.1" 'lgpl2.1)
79 ("LGPL-3" 'lgpl3)
b6a22275
RW
80 ("LGPL (>= 2)" 'lgpl2.0+)
81 ("LGPL (>= 3)" 'lgpl3+)
741d68c2
RW
82 ("MIT" 'expat)
83 ("MIT + file LICENSE" 'expat)
e1248602
RW
84 ((x) (string->license x))
85 ((lst ...) `(list ,@(map string->license lst)))
86 (_ #f)))
87
0f6b9e98
RW
88
89(define (description->alist description)
90 "Convert a DESCRIPTION string into an alist."
91 (let ((lines (string-split description #\newline))
92 (parse (lambda (line acc)
93 (if (string-null? line) acc
94 ;; Keys usually start with a capital letter and end with
95 ;; ":". There are some exceptions, unfortunately (such
96 ;; as "biocViews"). There are no blanks in a key.
97 (if (string-match "^[A-Za-z][^ :]+:( |\n|$)" line)
98 ;; New key/value pair
99 (let* ((pos (string-index line #\:))
100 (key (string-take line pos))
101 (value (string-drop line (+ 1 pos))))
102 (cons (cons key
103 (string-trim-both value))
104 acc))
105 ;; This is a continuation of the previous pair
106 (match-let ((((key . value) . rest) acc))
107 (cons (cons key (string-join
108 (list value
109 (string-trim-both line))))
110 rest)))))))
111 (fold parse '() lines)))
112
e1248602
RW
113(define (format-inputs names)
114 "Generate a sorted list of package inputs from a list of package NAMES."
115 (map (lambda (name)
116 (list name (list 'unquote (string->symbol name))))
117 (sort names string-ci<?)))
118
119(define* (maybe-inputs package-inputs #:optional (type 'inputs))
120 "Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a
121package definition."
122 (match package-inputs
123 (()
124 '())
125 ((package-inputs ...)
126 `((,type (,'quasiquote ,(format-inputs package-inputs)))))))
127
632ea817 128(define %cran-url "https://cran.r-project.org/web/packages/")
5713bbf1 129(define %bioconductor-url "https://bioconductor.org/packages/")
d0bd632f 130
6490cd31 131;; The latest Bioconductor release is 3.9. Bioconductor packages should be
d0bd632f 132;; updated together.
6490cd31 133(define %bioconductor-version "3.9")
84dfdc57
RW
134
135(define %bioconductor-packages-list-url
136 (string-append "https://bioconductor.org/packages/"
137 %bioconductor-version "/bioc/src/contrib/PACKAGES"))
138
139(define (bioconductor-packages-list)
140 "Return the latest version of package NAME for the current bioconductor
141release."
142 (let ((url (string->uri %bioconductor-packages-list-url)))
143 (guard (c ((http-get-error? c)
144 (format (current-error-port)
145 "error: failed to retrieve list of packages from ~s: ~a (~s)~%"
146 (uri->string (http-get-error-uri c))
147 (http-get-error-code c)
148 (http-get-error-reason c))
149 #f))
150 ;; Split the big list on empty lines, then turn each chunk into an
151 ;; alist of attributes.
152 (map (lambda (chunk)
153 (description->alist (string-join chunk "\n")))
154 (chunk-lines (read-lines (http-fetch/cached url)))))))
155
156(define (latest-bioconductor-package-version name)
157 "Return the version string corresponding to the latest release of the
158bioconductor package NAME, or #F if the package is unknown."
159 (and=> (find (lambda (meta)
160 (string=? (assoc-ref meta "Package") name))
161 (bioconductor-packages-list))
162 (cut assoc-ref <> "Version")))
d0bd632f 163
b3d0617a
RW
164;; Little helper to download URLs only once.
165(define download
166 (memoize
167 (lambda (url)
168 (with-store store (download-to-store store url)))))
169
7c9fcb08 170(define (fetch-description repository name)
0f6b9e98 171 "Return an alist of the contents of the DESCRIPTION file for the R package
7c9fcb08
RW
172NAME in the given REPOSITORY, or #f in case of failure. NAME is
173case-sensitive."
27baf509
RW
174 (case repository
175 ((cran)
176 (let ((url (string-append %cran-url name "/DESCRIPTION")))
177 (guard (c ((http-get-error? c)
178 (format (current-error-port)
179 "error: failed to retrieve package information \
fdbc84b0 180from ~s: ~a (~s)~%"
27baf509
RW
181 (uri->string (http-get-error-uri c))
182 (http-get-error-code c)
183 (http-get-error-reason c))
184 #f))
185 (description->alist (read-string (http-fetch url))))))
186 ((bioconductor)
187 ;; Currently, the bioconductor project does not offer a way to access a
188 ;; package's DESCRIPTION file over HTTP, so we determine the version,
189 ;; download the source tarball, and then extract the DESCRIPTION file.
10a1cacb
RW
190 (and-let* ((version (latest-bioconductor-package-version name))
191 (url (car (bioconductor-uri name version)))
b3d0617a 192 (tarball (download url)))
27baf509
RW
193 (call-with-temporary-directory
194 (lambda (dir)
195 (parameterize ((current-error-port (%make-void-port "rw+"))
196 (current-output-port (%make-void-port "rw+")))
197 (and (zero? (system* "tar" "--wildcards" "-x"
198 "--strip-components=1"
199 "-C" dir
200 "-f" tarball "*/DESCRIPTION"))
201 (description->alist (with-input-from-file
202 (string-append dir "/DESCRIPTION") read-string))))))))))
0f6b9e98
RW
203
204(define (listify meta field)
205 "Look up FIELD in the alist META. If FIELD contains a comma-separated
206string, turn it into a list and strip off parenthetic expressions. Return the
207empty list when the FIELD cannot be found."
208 (let ((value (assoc-ref meta field)))
209 (if (not value)
210 '()
211 ;; Strip off parentheses
212 (let ((items (string-split (regexp-substitute/global
213 #f "( *\\([^\\)]+\\)) *"
214 value 'pre 'post)
215 #\,)))
be036757
RW
216 (remove (lambda (item)
217 (or (string-null? item)
218 ;; When there is whitespace inside of items it is
219 ;; probably because this was not an actual list to
220 ;; begin with.
221 (string-any char-set:whitespace item)))
0f6b9e98
RW
222 (map string-trim-both items))))))
223
b26abe4f 224(define default-r-packages
aeb64f3c 225 (list "base"
b26abe4f 226 "compiler"
b26abe4f
RW
227 "grDevices"
228 "graphics"
229 "grid"
b26abe4f 230 "methods"
b26abe4f 231 "parallel"
b26abe4f
RW
232 "splines"
233 "stats"
234 "stats4"
b26abe4f
RW
235 "tcltk"
236 "tools"
237 "translations"
238 "utils"))
239
e96619ba
RW
240;; The field for system dependencies is often abused to specify non-package
241;; dependencies (such as c++11). This list is used to ignore them.
242(define invalid-packages
243 (list "c++11"))
244
ae9e5d66 245(define cran-guix-name (cut guix-name "r-" <>))
bfa0c752 246
2dca8b2d
RW
247(define (needs-fortran? tarball)
248 "Check if the TARBALL contains Fortran source files."
249 (define (check pattern)
250 (parameterize ((current-error-port (%make-void-port "rw+"))
251 (current-output-port (%make-void-port "rw+")))
252 (zero? (system* "tar" "--wildcards" "--list" pattern "-f" tarball))))
253 (or (check "*.f90")
254 (check "*.f95")
255 (check "*.f")))
256
a0f43208
RW
257(define (tarball-files-match-pattern? tarball regexp . file-patterns)
258 "Return #T if any of the files represented by FILE-PATTERNS in the TARBALL
259match the given REGEXP."
2dca8b2d
RW
260 (call-with-temporary-directory
261 (lambda (dir)
a0f43208 262 (let ((pattern (make-regexp regexp)))
2dca8b2d 263 (parameterize ((current-error-port (%make-void-port "rw+")))
a0f43208
RW
264 (apply system* "tar"
265 "xf" tarball "-C" dir
266 `("--wildcards" ,@file-patterns)))
2dca8b2d
RW
267 (any (lambda (file)
268 (call-with-input-file file
269 (lambda (port)
270 (let loop ()
271 (let ((line (read-line port)))
272 (cond
273 ((eof-object? line) #f)
274 ((regexp-exec pattern line) #t)
a0f43208 275 (else (loop))))))))
2dca8b2d
RW
276 (find-files dir))))))
277
a0f43208
RW
278(define (needs-zlib? tarball)
279 "Return #T if any of the Makevars files in the src directory of the TARBALL
280contain a zlib linker flag."
281 (tarball-files-match-pattern?
282 tarball "-lz"
283 "*/src/Makevars*" "*/src/configure*" "*/configure*"))
284
17a69cf6
RW
285(define (needs-pkg-config? tarball)
286 "Return #T if any of the Makevars files in the src directory of the TARBALL
287reference the pkg-config tool."
288 (tarball-files-match-pattern?
289 tarball "pkg-config"
290 "*/src/Makevars*" "*/src/configure*" "*/configure*"))
291
d0bd632f
RW
292(define (description->package repository meta)
293 "Return the `package' s-expression for an R package published on REPOSITORY
294from the alist META, which was derived from the R package's DESCRIPTION file."
d0bd632f
RW
295 (let* ((base-url (case repository
296 ((cran) %cran-url)
297 ((bioconductor) %bioconductor-url)))
298 (uri-helper (case repository
299 ((cran) cran-uri)
300 ((bioconductor) bioconductor-uri)))
301 (name (assoc-ref meta "Package"))
0f6b9e98
RW
302 (synopsis (assoc-ref meta "Title"))
303 (version (assoc-ref meta "Version"))
304 (license (string->license (assoc-ref meta "License")))
305 ;; Some packages have multiple home pages. Some have none.
306 (home-page (match (listify meta "URL")
307 ((url rest ...) url)
d0bd632f
RW
308 (_ (string-append base-url name))))
309 (source-url (match (uri-helper name version)
0f6b9e98 310 ((url rest ...) url)
d0bd632f 311 ((? string? url) url)
0f6b9e98 312 (_ #f)))
b3d0617a 313 (tarball (download source-url))
2dca8b2d
RW
314 (sysdepends (append
315 (if (needs-zlib? tarball) '("zlib") '())
531940d3
RW
316 (filter (lambda (name)
317 (not (member name invalid-packages)))
318 (map string-downcase (listify meta "SystemRequirements")))))
b26abe4f 319 (propagate (filter (lambda (name)
e96619ba
RW
320 (not (member name (append default-r-packages
321 invalid-packages))))
b26abe4f
RW
322 (lset-union equal?
323 (listify meta "Imports")
324 (listify meta "LinkingTo")
325 (delete "R"
326 (listify meta "Depends"))))))
ad68f7fa
RW
327 (values
328 `(package
ae9e5d66 329 (name ,(cran-guix-name name))
ad68f7fa
RW
330 (version ,version)
331 (source (origin
332 (method url-fetch)
333 (uri (,(procedure-name uri-helper) ,name version))
334 (sha256
335 (base32
336 ,(bytevector->nix-base32-string (file-sha256 tarball))))))
337 ,@(if (not (equal? (string-append "r-" name)
ae9e5d66 338 (cran-guix-name name)))
ad68f7fa
RW
339 `((properties ,`(,'quasiquote ((,'upstream-name . ,name)))))
340 '())
341 (build-system r-build-system)
342 ,@(maybe-inputs sysdepends)
ae9e5d66 343 ,@(maybe-inputs (map cran-guix-name propagate) 'propagated-inputs)
17a69cf6
RW
344 ,@(maybe-inputs
345 `(,@(if (needs-fortran? tarball)
346 '("gfortran") '())
347 ,@(if (needs-pkg-config? tarball)
348 '("pkg-config") '()))
349 'native-inputs)
ad68f7fa
RW
350 (home-page ,(if (string-null? home-page)
351 (string-append base-url name)
352 home-page))
353 (synopsis ,synopsis)
354 (description ,(beautify-description (or (assoc-ref meta "Description")
355 "")))
356 (license ,license))
357 propagate)))
e1248602 358
94e907b9
RW
359(define cran->guix-package
360 (memoize
361 (lambda* (package-name #:optional (repo 'cran))
362 "Fetch the metadata for PACKAGE-NAME from REPO and return the `package'
d0bd632f 363s-expression corresponding to that package, or #f on failure."
10a1cacb
RW
364 (let ((description (fetch-description repo package-name)))
365 (if (and (not description)
366 (eq? repo 'bioconductor))
367 ;; Retry import from CRAN
368 (cran->guix-package package-name 'cran)
7bb6420c
RW
369 (and description
370 (description->package repo description)))))))
94e907b9 371
2a13642b 372(define* (cran-recursive-import package-name #:optional (repo 'cran))
ae9e5d66
OP
373 (recursive-import package-name repo
374 #:repo->guix-package cran->guix-package
375 #:guix-name cran-guix-name))
d882c235
LC
376
377\f
378;;;
379;;; Updater.
380;;;
381
d1c11608
RW
382(define (package->upstream-name package)
383 "Return the upstream name of the PACKAGE."
384 (let* ((properties (package-properties package))
385 (upstream-name (and=> properties
386 (cut assoc-ref <> 'upstream-name))))
387 (if upstream-name
388 upstream-name
389 (match (package-source package)
390 ((? origin? origin)
391 (match (origin-uri origin)
b98293eb 392 ((or (? string? url) (url _ ...))
d1c11608
RW
393 (let ((end (string-rindex url #\_))
394 (start (string-rindex url #\/)))
395 ;; The URL ends on
396 ;; (string-append "/" name "_" version ".tar.gz")
db427602 397 (and start end (substring url (+ start 1) end))))
d1c11608
RW
398 (_ #f)))
399 (_ #f)))))
400
91e05559
RW
401(define (latest-cran-release pkg)
402 "Return an <upstream-source> for the latest release of the package PKG."
0f6b9e98 403
d1c11608 404 (define upstream-name
91e05559 405 (package->upstream-name pkg))
0f6b9e98
RW
406
407 (define meta
7c9fcb08 408 (fetch-description 'cran upstream-name))
0f6b9e98
RW
409
410 (and meta
411 (let ((version (assoc-ref meta "Version")))
412 ;; CRAN does not provide signatures.
413 (upstream-source
91e05559 414 (package (package-name pkg))
0f6b9e98 415 (version version)
91e05559
RW
416 (urls (cran-uri upstream-name version))
417 (input-changes
418 (changed-inputs pkg
419 (description->package 'cran meta)))))))
d882c235 420
91e05559
RW
421(define (latest-bioconductor-release pkg)
422 "Return an <upstream-source> for the latest release of the package PKG."
d0bd632f
RW
423
424 (define upstream-name
91e05559 425 (package->upstream-name pkg))
d0bd632f 426
27baf509
RW
427 (define version
428 (latest-bioconductor-package-version upstream-name))
d0bd632f 429
27baf509
RW
430 (and version
431 ;; Bioconductor does not provide signatures.
432 (upstream-source
91e05559 433 (package (package-name pkg))
27baf509 434 (version version)
91e05559
RW
435 (urls (bioconductor-uri upstream-name version))
436 (input-changes
437 (changed-inputs
438 pkg
439 (cran->guix-package upstream-name 'bioconductor))))))
d0bd632f 440
d882c235
LC
441(define (cran-package? package)
442 "Return true if PACKAGE is an R package from CRAN."
d0bd632f 443 (and (string-prefix? "r-" (package-name package))
db427602
MO
444 ;; Check if the upstream name can be extracted from package uri.
445 (package->upstream-name package)
446 ;; Check if package uri(s) are prefixed by "mirror://cran".
d0bd632f
RW
447 (match (and=> (package-source package) origin-uri)
448 ((? string? uri)
449 (string-prefix? "mirror://cran" uri))
450 ((? list? uris)
451 (any (cut string-prefix? "mirror://cran" <>) uris))
452 (_ #f))))
453
454(define (bioconductor-package? package)
455 "Return true if PACKAGE is an R package from Bioconductor."
5ae63362 456 (let ((predicate (lambda (uri)
5713bbf1 457 (and (string-prefix? "https://bioconductor.org" uri)
7c9fcb08
RW
458 ;; Data packages are neither listed in SVN nor on
459 ;; the Github mirror, so we have to exclude them
460 ;; from the set of bioconductor packages that can be
461 ;; updated automatically.
c9ffa91f
RW
462 (not (string-contains uri "/data/annotation/"))
463 ;; Experiment packages are in a separate repository.
464 (not (string-contains uri "/data/experiment/"))))))
5ae63362
RW
465 (and (string-prefix? "r-" (package-name package))
466 (match (and=> (package-source package) origin-uri)
467 ((? string? uri)
468 (predicate uri))
469 ((? list? uris)
470 (any predicate uris))
471 (_ #f)))))
472
473(define (bioconductor-data-package? package)
474 "Return true if PACKAGE is an R data package from Bioconductor."
475 (let ((predicate (lambda (uri)
5713bbf1 476 (and (string-prefix? "https://bioconductor.org" uri)
5ae63362
RW
477 (string-contains uri "/data/annotation/")))))
478 (and (string-prefix? "r-" (package-name package))
479 (match (and=> (package-source package) origin-uri)
480 ((? string? uri)
481 (predicate uri))
482 ((? list? uris)
483 (any predicate uris))
484 (_ #f)))))
d882c235 485
daaa270e
RW
486(define (bioconductor-experiment-package? package)
487 "Return true if PACKAGE is an R experiment package from Bioconductor."
488 (let ((predicate (lambda (uri)
5713bbf1 489 (and (string-prefix? "https://bioconductor.org" uri)
daaa270e
RW
490 (string-contains uri "/data/experiment/")))))
491 (and (string-prefix? "r-" (package-name package))
492 (match (and=> (package-source package) origin-uri)
493 ((? string? uri)
494 (predicate uri))
495 ((? list? uris)
496 (any predicate uris))
497 (_ #f)))))
498
d882c235 499(define %cran-updater
7e6b490d
AK
500 (upstream-updater
501 (name 'cran)
502 (description "Updater for CRAN packages")
503 (pred cran-package?)
d0bd632f
RW
504 (latest latest-cran-release)))
505
506(define %bioconductor-updater
507 (upstream-updater
508 (name 'bioconductor)
509 (description "Updater for Bioconductor packages")
510 (pred bioconductor-package?)
511 (latest latest-bioconductor-release)))
d882c235
LC
512
513;;; cran.scm ends here