gnu: r-rgraphviz: Move to (gnu packages bioconductor).
[jackhill/guix/guix.git] / guix / import / pypi.scm
CommitLineData
1b3e9685
DT
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2014 David Thompson <davet@gnu.org>
d1cb7e95 3;;; Copyright © 2015 Cyril Roelandt <tipecaml@gmail.com>
7657e61d 4;;; Copyright © 2015, 2016, 2017, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
22fc581d 5;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
e37f8894 6;;; Copyright © 2018 Ricardo Wurmus <rekado@elephly.net>
d514276b 7;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
9d0dfd9a 8;;; Copyright © 2020 Jakub Kądziołka <kuba@kadziolka.net>
c8abbe14 9;;; Copyright © 2020 Lars-Dominik Braun <ldb@leibniz-psychology.org>
a2daee84 10;;; Copyright © 2020 Arun Isaac <arunisaac@systemreboot.net>
1b3e9685
DT
11;;;
12;;; This file is part of GNU Guix.
13;;;
14;;; GNU Guix is free software; you can redistribute it and/or modify it
15;;; under the terms of the GNU General Public License as published by
16;;; the Free Software Foundation; either version 3 of the License, or (at
17;;; your option) any later version.
18;;;
19;;; GNU Guix is distributed in the hope that it will be useful, but
20;;; WITHOUT ANY WARRANTY; without even the implied warranty of
21;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22;;; GNU General Public License for more details.
23;;;
24;;; You should have received a copy of the GNU General Public License
25;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
26
27(define-module (guix import pypi)
1b3e9685 28 #:use-module (ice-9 match)
1b3e9685 29 #:use-module (ice-9 regex)
ac906cb7 30 #:use-module (ice-9 receive)
ff986890 31 #:use-module ((ice-9 rdelim) #:select (read-line))
1b3e9685 32 #:use-module (srfi srfi-1)
d514276b 33 #:use-module (srfi srfi-11)
ff986890 34 #:use-module (srfi srfi-26)
85dce718
LC
35 #:use-module (srfi srfi-34)
36 #:use-module (srfi srfi-35)
1b3e9685 37 #:use-module (guix utils)
6a79eed9
LC
38 #:use-module (guix memoization)
39 #:use-module (guix diagnostics)
40 #:use-module (guix i18n)
8173ceee
LC
41 #:use-module ((guix build utils)
42 #:select ((package-name->name+version
e37f8894 43 . hyphen-package-name->name+version)
c799ad72
MC
44 find-files
45 invoke))
1b3e9685 46 #:use-module (guix import utils)
bab020d7 47 #:use-module ((guix download) #:prefix download:)
1ff2619b 48 #:use-module (guix import json)
b5eb901a 49 #:use-module (json)
1b3e9685 50 #:use-module (guix packages)
bab020d7 51 #:use-module (guix upstream)
263ac57f 52 #:use-module ((guix licenses) #:prefix license:)
1b3e9685 53 #:use-module (guix build-system python)
c4797121 54 #:export (parse-requires.txt
f0190a5d 55 parse-wheel-metadata
803fb336 56 specification->requirement-name
c4797121 57 guix-package->pypi-name
ac906cb7 58 pypi-recursive-import
8173ceee 59 pypi->guix-package
bab020d7 60 %pypi-updater))
1b3e9685 61
7657e61d
LC
62;; The PyPI API (notice the rhyme) is "documented" at:
63;; <https://warehouse.readthedocs.io/api-reference/json/>.
64
65(define non-empty-string-or-false
66 (match-lambda
67 ("" #f)
68 ((? string? str) str)
a2daee84 69 ((or 'null #f) #f)))
7657e61d
LC
70
71;; PyPI project.
72(define-json-mapping <pypi-project> make-pypi-project pypi-project?
73 json->pypi-project
74 (info pypi-project-info "info" json->project-info) ;<project-info>
75 (last-serial pypi-project-last-serial "last_serial") ;integer
76 (releases pypi-project-releases "releases" ;string/<distribution>* pairs
77 (match-lambda
78 (((versions . dictionaries) ...)
79 (map (lambda (version vector)
80 (cons version
81 (map json->distribution
82 (vector->list vector))))
83 versions dictionaries))))
84 (distributions pypi-project-distributions "urls" ;<distribution>*
85 (lambda (vector)
86 (map json->distribution (vector->list vector)))))
87
88;; Project metadata.
89(define-json-mapping <project-info> make-project-info project-info?
90 json->project-info
91 (name project-info-name) ;string
92 (author project-info-author) ;string
93 (maintainer project-info-maintainer) ;string
94 (classifiers project-info-classifiers ;list of strings
95 "classifiers" vector->list)
96 (description project-info-description) ;string
97 (summary project-info-summary) ;string
98 (keywords project-info-keywords) ;string
99 (license project-info-license) ;string
100 (download-url project-info-download-url ;string | #f
101 "download_url" non-empty-string-or-false)
102 (home-page project-info-home-page ;string
103 "home_page")
104 (url project-info-url "project_url") ;string
105 (release-url project-info-release-url "release_url") ;string
106 (version project-info-version)) ;string
107
108;; Distribution: a URL along with cryptographic hashes and metadata.
109(define-json-mapping <distribution> make-distribution distribution?
110 json->distribution
111 (url distribution-url) ;string
112 (digests distribution-digests) ;list of string pairs
113 (file-name distribution-file-name "filename") ;string
114 (has-signature? distribution-has-signature? "hash_sig") ;Boolean
115 (package-type distribution-package-type "packagetype") ;"bdist_wheel" | ...
116 (python-version distribution-package-python-version
117 "python_version"))
118
1b3e9685 119(define (pypi-fetch name)
7657e61d
LC
120 "Return a <pypi-project> record for package NAME, or #f on failure."
121 (and=> (json-fetch (string-append "https://pypi.org/pypi/" name "/json"))
122 json->pypi-project))
1b3e9685 123
85dce718
LC
124;; For packages found on PyPI that lack a source distribution.
125(define-condition-type &missing-source-error &error
126 missing-source-error?
127 (package missing-source-error-package))
128
1b3e9685
DT
129(define (latest-source-release pypi-package)
130 "Return the latest source release for PYPI-PACKAGE."
7657e61d
LC
131 (let ((releases (assoc-ref (pypi-project-releases pypi-package)
132 (project-info-version
133 (pypi-project-info pypi-package)))))
1b3e9685 134 (or (find (lambda (release)
7657e61d
LC
135 (string=? "sdist" (distribution-package-type release)))
136 releases)
85dce718
LC
137 (raise (condition (&missing-source-error
138 (package pypi-package)))))))
1b3e9685 139
266785d2
CR
140(define (latest-wheel-release pypi-package)
141 "Return the url of the wheel for the latest release of pypi-package,
142or #f if there isn't any."
7657e61d
LC
143 (let ((releases (assoc-ref (pypi-project-releases pypi-package)
144 (project-info-version
145 (pypi-project-info pypi-package)))))
266785d2 146 (or (find (lambda (release)
7657e61d
LC
147 (string=? "bdist_wheel" (distribution-package-type release)))
148 releases)
266785d2
CR
149 #f)))
150
ff986890
CR
151(define (python->package-name name)
152 "Given the NAME of a package on PyPI, return a Guix-compliant name for the
153package."
154 (if (string-prefix? "python-" name)
155 (snake-case name)
156 (string-append "python-" (snake-case name))))
157
bab020d7 158(define (guix-package->pypi-name package)
8440db45 159 "Given a Python PACKAGE built from pypi.org, return the name of the
bab020d7 160package on PyPI."
4eaac4b7 161 (define (url->pypi-name url)
8173ceee 162 (hyphen-package-name->name+version
4eaac4b7
LC
163 (basename (file-sans-extension url))))
164
165 (match (and=> (package-source package) origin-uri)
166 ((? string? url)
167 (url->pypi-name url))
168 ((lst ...)
169 (any url->pypi-name lst))
170 (#f #f)))
bab020d7 171
266785d2
CR
172(define (wheel-url->extracted-directory wheel-url)
173 (match (string-split (basename wheel-url) #\-)
174 ((name version _ ...)
175 (string-append name "-" version ".dist-info"))))
176
d514276b 177(define (maybe-inputs package-inputs input-type)
ff986890 178 "Given a list of PACKAGE-INPUTS, tries to generate the 'inputs' field of a
d514276b
MC
179package definition. INPUT-TYPE, a symbol, is used to populate the name of
180the input field."
ff986890
CR
181 (match package-inputs
182 (()
183 '())
184 ((package-inputs ...)
d514276b 185 `((,input-type (,'quasiquote ,package-inputs))))))
ff986890 186
803fb336
MC
187(define %requirement-name-regexp
188 ;; Regexp to match the requirement name in a requirement specification.
189
190 ;; Some grammar, taken from PEP-0508 (see:
191 ;; https://www.python.org/dev/peps/pep-0508/).
192
193 ;; Using this grammar makes the PEP-0508 regexp easier to understand for
194 ;; humans. The use of a regexp is preferred to more primitive string
195 ;; manipulations because we can more directly match what upstream uses
196 ;; (again, per PEP-0508). The regexp approach is also easier to extend,
197 ;; should we want to implement more completely the grammar of PEP-0508.
198
199 ;; The unified rule can be expressed as:
200 ;; specification = wsp* ( url_req | name_req ) wsp*
201
202 ;; where url_req is:
203 ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker?
204
205 ;; and where name_req is:
206 ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker?
207
208 ;; Thus, we need only matching NAME, which is expressed as:
209 ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
210 ;; identifier = letterOrDigit identifier_end*
211 ;; name = identifier
212 (let* ((letter-or-digit "[A-Za-z0-9]")
213 (identifier-end (string-append "(" letter-or-digit "|"
214 "[-_.]*" letter-or-digit ")"))
215 (identifier (string-append "^" letter-or-digit identifier-end "*"))
216 (name identifier))
217 (make-regexp name)))
218
219(define (specification->requirement-name spec)
220 "Given a specification SPEC, return the requirement name."
221 (match:substring
222 (or (regexp-exec %requirement-name-regexp spec)
223 (error (G_ "Could not extract requirement name in spec:") spec))))
c4797121 224
d514276b
MC
225(define (test-section? name)
226 "Return #t if the section name contains 'test' or 'dev'."
227 (any (cut string-contains-ci name <>)
228 '("test" "dev")))
229
c4797121 230(define (parse-requires.txt requires.txt)
d514276b
MC
231 "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of lists
232of requirements.
233
234The first list contains the required dependencies while the second the
235optional test dependencies. Note that currently, optional, non-test
236dependencies are omitted since these can be difficult or expensive to
237satisfy."
c4797121
MC
238
239 (define (comment? line)
240 ;; Return #t if the given LINE is a comment, #f otherwise.
241 (string-prefix? "#" (string-trim line)))
242
243 (define (section-header? line)
244 ;; Return #t if the given LINE is a section header, #f otherwise.
245 (string-prefix? "[" (string-trim line)))
246
247 (call-with-input-file requires.txt
248 (lambda (port)
d514276b
MC
249 (let loop ((required-deps '())
250 (test-deps '())
251 (inside-test-section? #f)
252 (optional? #f))
c4797121 253 (let ((line (read-line port)))
f0190a5d 254 (cond
d514276b 255 ((eof-object? line)
f0190a5d
MC
256 ;; Duplicates can occur, since the same requirement can be
257 ;; listed multiple times with different conditional markers, e.g.
258 ;; pytest >= 3 ; python_version >= "3.3"
259 ;; pytest < 3 ; python_version < "3.3"
d514276b
MC
260 (map (compose reverse delete-duplicates)
261 (list required-deps test-deps)))
f0190a5d 262 ((or (string-null? line) (comment? line))
d514276b
MC
263 (loop required-deps test-deps inside-test-section? optional?))
264 ((section-header? line)
265 ;; Encountering a section means that all the requirements
266 ;; listed below are optional. Since we want to pick only the
267 ;; test dependencies from the optional dependencies, we must
268 ;; track those separately.
269 (loop required-deps test-deps (test-section? line) #t))
270 (inside-test-section?
271 (loop required-deps
272 (cons (specification->requirement-name line)
273 test-deps)
274 inside-test-section? optional?))
275 ((not optional?)
f0190a5d 276 (loop (cons (specification->requirement-name line)
d514276b
MC
277 required-deps)
278 test-deps inside-test-section? optional?))
279 (optional?
280 ;; Skip optional items.
281 (loop required-deps test-deps inside-test-section? optional?))
282 (else
283 (warning (G_ "parse-requires.txt reached an unexpected \
284condition on line ~a~%") line))))))))
f0190a5d
MC
285
286(define (parse-wheel-metadata metadata)
d514276b
MC
287 "Given METADATA, a Wheel metadata file, return a list of lists of
288requirements.
289
290Refer to the documentation of PARSE-REQUIRES.TXT for a description of the
291returned value."
f0190a5d
MC
292 ;; METADATA is a RFC-2822-like, header based file.
293
294 (define (requires-dist-header? line)
295 ;; Return #t if the given LINE is a Requires-Dist header.
296 (string-match "^Requires-Dist: " line))
297
298 (define (requires-dist-value line)
299 (string-drop line (string-length "Requires-Dist: ")))
300
301 (define (extra? line)
302 ;; Return #t if the given LINE is an "extra" requirement.
303 (string-match "extra == '(.*)'" line))
304
d514276b
MC
305 (define (test-requirement? line)
306 (and=> (match:substring (extra? line) 1) test-section?))
307
f0190a5d
MC
308 (call-with-input-file metadata
309 (lambda (port)
d514276b
MC
310 (let loop ((required-deps '())
311 (test-deps '()))
f0190a5d 312 (let ((line (read-line port)))
f0190a5d
MC
313 (cond
314 ((eof-object? line)
d514276b
MC
315 (map (compose reverse delete-duplicates)
316 (list required-deps test-deps)))
f0190a5d
MC
317 ((and (requires-dist-header? line) (not (extra? line)))
318 (loop (cons (specification->requirement-name
319 (requires-dist-value line))
d514276b
MC
320 required-deps)
321 test-deps))
322 ((and (requires-dist-header? line) (test-requirement? line))
323 (loop required-deps
324 (cons (specification->requirement-name (requires-dist-value line))
325 test-deps)))
f0190a5d 326 (else
d514276b 327 (loop required-deps test-deps)))))))) ;skip line
c4797121 328
c799ad72 329(define (guess-requirements source-url wheel-url archive)
73e83730 330 "Given SOURCE-URL, WHEEL-URL and an ARCHIVE of the package, return a list
c799ad72 331of the required packages specified in the requirements.txt file. ARCHIVE will
e37f8894 332be extracted in a temporary directory."
ff986890 333
266785d2
CR
334 (define (read-wheel-metadata wheel-archive)
335 ;; Given WHEEL-ARCHIVE, a ZIP Python wheel archive, return the package's
f0190a5d
MC
336 ;; requirements, or #f if the metadata file contained therein couldn't be
337 ;; extracted.
266785d2 338 (let* ((dirname (wheel-url->extracted-directory wheel-url))
f0190a5d
MC
339 (metadata (string-append dirname "/METADATA")))
340 (call-with-temporary-directory
341 (lambda (dir)
f801c621
MC
342 (if (zero?
343 (parameterize ((current-error-port (%make-void-port "rw+"))
344 (current-output-port (%make-void-port "rw+")))
345 (system* "unzip" wheel-archive "-d" dir metadata)))
f0190a5d
MC
346 (parse-wheel-metadata (string-append dir "/" metadata))
347 (begin
348 (warning
349 (G_ "Failed to extract file: ~a from wheel.~%") metadata)
350 #f))))))
266785d2
CR
351
352 (define (guess-requirements-from-wheel)
353 ;; Return the package's requirements using the wheel, or #f if an error
354 ;; occurs.
355 (call-with-temporary-output-file
356 (lambda (temp port)
357 (if wheel-url
c799ad72
MC
358 (and (url-fetch wheel-url temp)
359 (read-wheel-metadata temp))
360 #f))))
266785d2 361
266785d2
CR
362 (define (guess-requirements-from-source)
363 ;; Return the package's requirements by guessing them from the source.
c799ad72
MC
364 (if (compressed-file? source-url)
365 (call-with-temporary-directory
366 (lambda (dir)
367 (parameterize ((current-error-port (%make-void-port "rw+"))
368 (current-output-port (%make-void-port "rw+")))
369 (if (string=? "zip" (file-extension source-url))
370 (invoke "unzip" archive "-d" dir)
371 (invoke "tar" "xf" archive "-C" dir)))
372 (let ((requires.txt-files
373 (find-files dir (lambda (abs-file-name _)
374 (string-match "\\.egg-info/requires.txt$"
375 abs-file-name)))))
376 (match requires.txt-files
377 (()
378 (warning (G_ "Cannot guess requirements from source archive:\
379 no requires.txt file found.~%"))
d514276b 380 (list '() '()))
c799ad72
MC
381 (else (parse-requires.txt (first requires.txt-files)))))))
382 (begin
383 (warning (G_ "Unsupported archive format; \
384cannot determine package dependencies from source archive: ~a~%")
385 (basename source-url))
d514276b 386 (list '() '()))))
266785d2 387
01589acc
MC
388 ;; First, try to compute the requirements using the wheel, else, fallback to
389 ;; reading the "requires.txt" from the egg-info directory from the source
d514276b 390 ;; archive.
266785d2
CR
391 (or (guess-requirements-from-wheel)
392 (guess-requirements-from-source)))
393
c799ad72 394(define (compute-inputs source-url wheel-url archive)
d514276b
MC
395 "Given the SOURCE-URL and WHEEL-URL of an already downloaded ARCHIVE, return
396a pair of lists, each consisting of a list of name/variable pairs, for the
397propagated inputs and the native inputs, respectively. Also
ac906cb7 398return the unaltered list of upstream dependency names."
d514276b
MC
399
400 (define (strip-argparse deps)
401 (remove (cut string=? "argparse" <>) deps))
402
403 (define (requirement->package-name/sort deps)
404 (sort
405 (map (lambda (input)
406 (let ((guix-name (python->package-name input)))
407 (list guix-name (list 'unquote (string->symbol guix-name)))))
408 deps)
409 (lambda args
410 (match args
411 (((a _ ...) (b _ ...))
412 (string-ci<? a b))))))
413
414 (define process-requirements
415 (compose requirement->package-name/sort strip-argparse))
416
417 (let ((dependencies (guess-requirements source-url wheel-url archive)))
418 (values (map process-requirements dependencies)
419 (concatenate dependencies))))
1b3e9685 420
266785d2 421(define (make-pypi-sexp name version source-url wheel-url home-page synopsis
1b3e9685
DT
422 description license)
423 "Return the `package' s-expression for a python package with the given NAME,
424VERSION, SOURCE-URL, HOME-PAGE, SYNOPSIS, DESCRIPTION, and LICENSE."
ff986890
CR
425 (call-with-temporary-output-file
426 (lambda (temp port)
427 (and (url-fetch source-url temp)
d514276b 428 (receive (guix-dependencies upstream-dependencies)
ac906cb7 429 (compute-inputs source-url wheel-url temp)
d514276b 430 (match guix-dependencies
9d0dfd9a
JK
431 ((required-inputs native-inputs)
432 (when (string-suffix? ".zip" source-url)
433 (set! native-inputs (cons
434 '("unzip" ,unzip)
435 native-inputs)))
d514276b
MC
436 (values
437 `(package
438 (name ,(python->package-name name))
439 (version ,version)
4b60ab8c
MC
440 (source
441 (origin
442 (method url-fetch)
9d0dfd9a
JK
443 (uri (pypi-uri
444 ;; PyPI URL are case sensitive, but sometimes
445 ;; a project named using mixed case has a URL
446 ;; using lower case, so we must work around this
447 ;; inconsistency. For actual examples, compare
448 ;; the URLs of the "Deprecated" and "uWSGI" PyPI
449 ;; packages.
450 ,(if (string-contains source-url name)
451 name
452 (string-downcase name))
453 version
454 ;; Some packages have been released as `.zip`
455 ;; instead of the more common `.tar.gz`. For
456 ;; example, see "path-and-address".
457 ,@(if (string-suffix? ".zip" source-url)
458 '(".zip")
459 '())))
4b60ab8c
MC
460 (sha256
461 (base32
462 ,(guix-hash-url temp)))))
d514276b
MC
463 (build-system python-build-system)
464 ,@(maybe-inputs required-inputs 'propagated-inputs)
9d0dfd9a 465 ,@(maybe-inputs native-inputs 'native-inputs)
d514276b
MC
466 (home-page ,home-page)
467 (synopsis ,synopsis)
468 (description ,description)
469 (license ,(license->symbol license)))
470 upstream-dependencies))))))))
1b3e9685 471
ac906cb7
RW
472(define pypi->guix-package
473 (memoize
474 (lambda* (package-name)
475 "Fetch the metadata for PACKAGE-NAME from pypi.org, and return the
467a3c93 476`package' s-expression corresponding to that package, or #f on failure."
7657e61d
LC
477 (let* ((project (pypi-fetch package-name))
478 (info (and project (pypi-project-info project))))
479 (and project
ac906cb7
RW
480 (guard (c ((missing-source-error? c)
481 (let ((package (missing-source-error-package c)))
482 (leave (G_ "no source release for pypi package ~a ~a~%")
7657e61d
LC
483 (project-info-name info)
484 (project-info-version info)))))
485 (make-pypi-sexp (project-info-name info)
486 (project-info-version info)
487 (and=> (latest-source-release project)
488 distribution-url)
489 (and=> (latest-wheel-release project)
490 distribution-url)
491 (project-info-home-page info)
492 (project-info-summary info)
493 (project-info-summary info)
494 (string->license
495 (project-info-license info)))))))))
ac906cb7
RW
496
497(define (pypi-recursive-import package-name)
498 (recursive-import package-name #f
499 #:repo->guix-package (lambda (name repo)
500 (pypi->guix-package name))
501 #:guix-name python->package-name))
bab020d7 502
263ac57f
DC
503(define (string->license str)
504 "Convert the string STR into a license object."
505 (match str
506 ("GNU LGPL" license:lgpl2.0)
507 ("GPL" license:gpl3)
c8abbe14
LDB
508 ((or "BSD" "BSD-3" "BSD License") license:bsd-3)
509 ("BSD-2-Clause" license:bsd-2)
510 ((or "MIT" "MIT license" "MIT License" "Expat license") license:expat)
263ac57f
DC
511 ("Public domain" license:public-domain)
512 ((or "Apache License, Version 2.0" "Apache 2.0") license:asl2.0)
c8abbe14 513 ("MPL 2.0" license:mpl2.0)
263ac57f
DC
514 (_ #f)))
515
00290e73
LC
516(define pypi-package?
517 (url-predicate
518 (lambda (url)
519 (or (string-prefix? "https://pypi.org/" url)
520 (string-prefix? "https://pypi.python.org/" url)
521 (string-prefix? "https://pypi.org/packages" url)
522 (string-prefix? "https://files.pythonhosted.org/packages" url)))))
bab020d7 523
7d27a025
LC
524(define (latest-release package)
525 "Return an <upstream-source> for the latest release of PACKAGE."
22fc581d
MO
526 (let* ((pypi-name (guix-package->pypi-name package))
527 (pypi-package (pypi-fetch pypi-name)))
528 (and pypi-package
529 (guard (c ((missing-source-error? c) #f))
7657e61d
LC
530 (let* ((info (pypi-project-info pypi-package))
531 (version (project-info-version info))
532 (url (distribution-url
533 (latest-source-release pypi-package))))
22fc581d
MO
534 (upstream-source
535 (package (package-name package))
536 (version version)
537 (urls (list url))))))))
bab020d7
CR
538
539(define %pypi-updater
540 (upstream-updater
541 (name 'pypi)
542 (description "Updater for PyPI packages")
543 (pred pypi-package?)
544 (latest latest-release)))