gnu: Add r-all.
[jackhill/guix/guix.git] / guix / import / pypi.scm
CommitLineData
1b3e9685
DT
1;;; GNU Guix --- Functional package management for GNU
2;;; Copyright © 2014 David Thompson <davet@gnu.org>
d1cb7e95 3;;; Copyright © 2015 Cyril Roelandt <tipecaml@gmail.com>
4eaac4b7 4;;; Copyright © 2015, 2016, 2017 Ludovic Courtès <ludo@gnu.org>
22fc581d 5;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
e37f8894 6;;; Copyright © 2018 Ricardo Wurmus <rekado@elephly.net>
d514276b 7;;; Copyright © 2019 Maxim Cournoyer <maxim.cournoyer@gmail.com>
1b3e9685
DT
8;;;
9;;; This file is part of GNU Guix.
10;;;
11;;; GNU Guix is free software; you can redistribute it and/or modify it
12;;; under the terms of the GNU General Public License as published by
13;;; the Free Software Foundation; either version 3 of the License, or (at
14;;; your option) any later version.
15;;;
16;;; GNU Guix is distributed in the hope that it will be useful, but
17;;; WITHOUT ANY WARRANTY; without even the implied warranty of
18;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19;;; GNU General Public License for more details.
20;;;
21;;; You should have received a copy of the GNU General Public License
22;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
23
24(define-module (guix import pypi)
1b3e9685 25 #:use-module (ice-9 match)
1b3e9685 26 #:use-module (ice-9 regex)
ac906cb7 27 #:use-module (ice-9 receive)
ff986890 28 #:use-module ((ice-9 rdelim) #:select (read-line))
1b3e9685 29 #:use-module (srfi srfi-1)
d514276b 30 #:use-module (srfi srfi-11)
ff986890 31 #:use-module (srfi srfi-26)
85dce718
LC
32 #:use-module (srfi srfi-34)
33 #:use-module (srfi srfi-35)
ff986890 34 #:use-module (guix ui)
1b3e9685 35 #:use-module (guix utils)
8173ceee
LC
36 #:use-module ((guix build utils)
37 #:select ((package-name->name+version
e37f8894 38 . hyphen-package-name->name+version)
c799ad72
MC
39 find-files
40 invoke))
1b3e9685 41 #:use-module (guix import utils)
bab020d7 42 #:use-module ((guix download) #:prefix download:)
1ff2619b 43 #:use-module (guix import json)
1b3e9685 44 #:use-module (guix packages)
bab020d7 45 #:use-module (guix upstream)
263ac57f 46 #:use-module ((guix licenses) #:prefix license:)
1b3e9685 47 #:use-module (guix build-system python)
c4797121 48 #:export (parse-requires.txt
f0190a5d 49 parse-wheel-metadata
803fb336 50 specification->requirement-name
c4797121 51 guix-package->pypi-name
ac906cb7 52 pypi-recursive-import
8173ceee 53 pypi->guix-package
bab020d7 54 %pypi-updater))
1b3e9685 55
1b3e9685 56(define (pypi-fetch name)
467a3c93
LC
57 "Return an alist representation of the PyPI metadata for the package NAME,
58or #f on failure."
8440db45 59 (json-fetch-alist (string-append "https://pypi.org/pypi/" name "/json")))
1b3e9685 60
85dce718
LC
61;; For packages found on PyPI that lack a source distribution.
62(define-condition-type &missing-source-error &error
63 missing-source-error?
64 (package missing-source-error-package))
65
1b3e9685
DT
66(define (latest-source-release pypi-package)
67 "Return the latest source release for PYPI-PACKAGE."
68 (let ((releases (assoc-ref* pypi-package "releases"
69 (assoc-ref* pypi-package "info" "version"))))
70 (or (find (lambda (release)
71 (string=? "sdist" (assoc-ref release "packagetype")))
72 releases)
85dce718
LC
73 (raise (condition (&missing-source-error
74 (package pypi-package)))))))
1b3e9685 75
266785d2
CR
76(define (latest-wheel-release pypi-package)
77 "Return the url of the wheel for the latest release of pypi-package,
78or #f if there isn't any."
79 (let ((releases (assoc-ref* pypi-package "releases"
80 (assoc-ref* pypi-package "info" "version"))))
81 (or (find (lambda (release)
82 (string=? "bdist_wheel" (assoc-ref release "packagetype")))
83 releases)
84 #f)))
85
ff986890
CR
86(define (python->package-name name)
87 "Given the NAME of a package on PyPI, return a Guix-compliant name for the
88package."
89 (if (string-prefix? "python-" name)
90 (snake-case name)
91 (string-append "python-" (snake-case name))))
92
bab020d7 93(define (guix-package->pypi-name package)
8440db45 94 "Given a Python PACKAGE built from pypi.org, return the name of the
bab020d7 95package on PyPI."
4eaac4b7 96 (define (url->pypi-name url)
8173ceee 97 (hyphen-package-name->name+version
4eaac4b7
LC
98 (basename (file-sans-extension url))))
99
100 (match (and=> (package-source package) origin-uri)
101 ((? string? url)
102 (url->pypi-name url))
103 ((lst ...)
104 (any url->pypi-name lst))
105 (#f #f)))
bab020d7 106
266785d2
CR
107(define (wheel-url->extracted-directory wheel-url)
108 (match (string-split (basename wheel-url) #\-)
109 ((name version _ ...)
110 (string-append name "-" version ".dist-info"))))
111
d514276b 112(define (maybe-inputs package-inputs input-type)
ff986890 113 "Given a list of PACKAGE-INPUTS, tries to generate the 'inputs' field of a
d514276b
MC
114package definition. INPUT-TYPE, a symbol, is used to populate the name of
115the input field."
ff986890
CR
116 (match package-inputs
117 (()
118 '())
119 ((package-inputs ...)
d514276b 120 `((,input-type (,'quasiquote ,package-inputs))))))
ff986890 121
803fb336
MC
122(define %requirement-name-regexp
123 ;; Regexp to match the requirement name in a requirement specification.
124
125 ;; Some grammar, taken from PEP-0508 (see:
126 ;; https://www.python.org/dev/peps/pep-0508/).
127
128 ;; Using this grammar makes the PEP-0508 regexp easier to understand for
129 ;; humans. The use of a regexp is preferred to more primitive string
130 ;; manipulations because we can more directly match what upstream uses
131 ;; (again, per PEP-0508). The regexp approach is also easier to extend,
132 ;; should we want to implement more completely the grammar of PEP-0508.
133
134 ;; The unified rule can be expressed as:
135 ;; specification = wsp* ( url_req | name_req ) wsp*
136
137 ;; where url_req is:
138 ;; url_req = name wsp* extras? wsp* urlspec wsp+ quoted_marker?
139
140 ;; and where name_req is:
141 ;; name_req = name wsp* extras? wsp* versionspec? wsp* quoted_marker?
142
143 ;; Thus, we need only matching NAME, which is expressed as:
144 ;; identifer_end = letterOrDigit | (('-' | '_' | '.' )* letterOrDigit)
145 ;; identifier = letterOrDigit identifier_end*
146 ;; name = identifier
147 (let* ((letter-or-digit "[A-Za-z0-9]")
148 (identifier-end (string-append "(" letter-or-digit "|"
149 "[-_.]*" letter-or-digit ")"))
150 (identifier (string-append "^" letter-or-digit identifier-end "*"))
151 (name identifier))
152 (make-regexp name)))
153
154(define (specification->requirement-name spec)
155 "Given a specification SPEC, return the requirement name."
156 (match:substring
157 (or (regexp-exec %requirement-name-regexp spec)
158 (error (G_ "Could not extract requirement name in spec:") spec))))
c4797121 159
d514276b
MC
160(define (test-section? name)
161 "Return #t if the section name contains 'test' or 'dev'."
162 (any (cut string-contains-ci name <>)
163 '("test" "dev")))
164
c4797121 165(define (parse-requires.txt requires.txt)
d514276b
MC
166 "Given REQUIRES.TXT, a Setuptools requires.txt file, return a list of lists
167of requirements.
168
169The first list contains the required dependencies while the second the
170optional test dependencies. Note that currently, optional, non-test
171dependencies are omitted since these can be difficult or expensive to
172satisfy."
c4797121
MC
173
174 (define (comment? line)
175 ;; Return #t if the given LINE is a comment, #f otherwise.
176 (string-prefix? "#" (string-trim line)))
177
178 (define (section-header? line)
179 ;; Return #t if the given LINE is a section header, #f otherwise.
180 (string-prefix? "[" (string-trim line)))
181
182 (call-with-input-file requires.txt
183 (lambda (port)
d514276b
MC
184 (let loop ((required-deps '())
185 (test-deps '())
186 (inside-test-section? #f)
187 (optional? #f))
c4797121 188 (let ((line (read-line port)))
f0190a5d 189 (cond
d514276b 190 ((eof-object? line)
f0190a5d
MC
191 ;; Duplicates can occur, since the same requirement can be
192 ;; listed multiple times with different conditional markers, e.g.
193 ;; pytest >= 3 ; python_version >= "3.3"
194 ;; pytest < 3 ; python_version < "3.3"
d514276b
MC
195 (map (compose reverse delete-duplicates)
196 (list required-deps test-deps)))
f0190a5d 197 ((or (string-null? line) (comment? line))
d514276b
MC
198 (loop required-deps test-deps inside-test-section? optional?))
199 ((section-header? line)
200 ;; Encountering a section means that all the requirements
201 ;; listed below are optional. Since we want to pick only the
202 ;; test dependencies from the optional dependencies, we must
203 ;; track those separately.
204 (loop required-deps test-deps (test-section? line) #t))
205 (inside-test-section?
206 (loop required-deps
207 (cons (specification->requirement-name line)
208 test-deps)
209 inside-test-section? optional?))
210 ((not optional?)
f0190a5d 211 (loop (cons (specification->requirement-name line)
d514276b
MC
212 required-deps)
213 test-deps inside-test-section? optional?))
214 (optional?
215 ;; Skip optional items.
216 (loop required-deps test-deps inside-test-section? optional?))
217 (else
218 (warning (G_ "parse-requires.txt reached an unexpected \
219condition on line ~a~%") line))))))))
f0190a5d
MC
220
221(define (parse-wheel-metadata metadata)
d514276b
MC
222 "Given METADATA, a Wheel metadata file, return a list of lists of
223requirements.
224
225Refer to the documentation of PARSE-REQUIRES.TXT for a description of the
226returned value."
f0190a5d
MC
227 ;; METADATA is a RFC-2822-like, header based file.
228
229 (define (requires-dist-header? line)
230 ;; Return #t if the given LINE is a Requires-Dist header.
231 (string-match "^Requires-Dist: " line))
232
233 (define (requires-dist-value line)
234 (string-drop line (string-length "Requires-Dist: ")))
235
236 (define (extra? line)
237 ;; Return #t if the given LINE is an "extra" requirement.
238 (string-match "extra == '(.*)'" line))
239
d514276b
MC
240 (define (test-requirement? line)
241 (and=> (match:substring (extra? line) 1) test-section?))
242
f0190a5d
MC
243 (call-with-input-file metadata
244 (lambda (port)
d514276b
MC
245 (let loop ((required-deps '())
246 (test-deps '()))
f0190a5d 247 (let ((line (read-line port)))
f0190a5d
MC
248 (cond
249 ((eof-object? line)
d514276b
MC
250 (map (compose reverse delete-duplicates)
251 (list required-deps test-deps)))
f0190a5d
MC
252 ((and (requires-dist-header? line) (not (extra? line)))
253 (loop (cons (specification->requirement-name
254 (requires-dist-value line))
d514276b
MC
255 required-deps)
256 test-deps))
257 ((and (requires-dist-header? line) (test-requirement? line))
258 (loop required-deps
259 (cons (specification->requirement-name (requires-dist-value line))
260 test-deps)))
f0190a5d 261 (else
d514276b 262 (loop required-deps test-deps)))))))) ;skip line
c4797121 263
c799ad72 264(define (guess-requirements source-url wheel-url archive)
73e83730 265 "Given SOURCE-URL, WHEEL-URL and an ARCHIVE of the package, return a list
c799ad72 266of the required packages specified in the requirements.txt file. ARCHIVE will
e37f8894 267be extracted in a temporary directory."
ff986890 268
266785d2
CR
269 (define (read-wheel-metadata wheel-archive)
270 ;; Given WHEEL-ARCHIVE, a ZIP Python wheel archive, return the package's
f0190a5d
MC
271 ;; requirements, or #f if the metadata file contained therein couldn't be
272 ;; extracted.
266785d2 273 (let* ((dirname (wheel-url->extracted-directory wheel-url))
f0190a5d
MC
274 (metadata (string-append dirname "/METADATA")))
275 (call-with-temporary-directory
276 (lambda (dir)
f801c621
MC
277 (if (zero?
278 (parameterize ((current-error-port (%make-void-port "rw+"))
279 (current-output-port (%make-void-port "rw+")))
280 (system* "unzip" wheel-archive "-d" dir metadata)))
f0190a5d
MC
281 (parse-wheel-metadata (string-append dir "/" metadata))
282 (begin
283 (warning
284 (G_ "Failed to extract file: ~a from wheel.~%") metadata)
285 #f))))))
266785d2
CR
286
287 (define (guess-requirements-from-wheel)
288 ;; Return the package's requirements using the wheel, or #f if an error
289 ;; occurs.
290 (call-with-temporary-output-file
291 (lambda (temp port)
292 (if wheel-url
c799ad72
MC
293 (and (url-fetch wheel-url temp)
294 (read-wheel-metadata temp))
295 #f))))
266785d2 296
266785d2
CR
297 (define (guess-requirements-from-source)
298 ;; Return the package's requirements by guessing them from the source.
c799ad72
MC
299 (if (compressed-file? source-url)
300 (call-with-temporary-directory
301 (lambda (dir)
302 (parameterize ((current-error-port (%make-void-port "rw+"))
303 (current-output-port (%make-void-port "rw+")))
304 (if (string=? "zip" (file-extension source-url))
305 (invoke "unzip" archive "-d" dir)
306 (invoke "tar" "xf" archive "-C" dir)))
307 (let ((requires.txt-files
308 (find-files dir (lambda (abs-file-name _)
309 (string-match "\\.egg-info/requires.txt$"
310 abs-file-name)))))
311 (match requires.txt-files
312 (()
313 (warning (G_ "Cannot guess requirements from source archive:\
314 no requires.txt file found.~%"))
d514276b 315 (list '() '()))
c799ad72
MC
316 (else (parse-requires.txt (first requires.txt-files)))))))
317 (begin
318 (warning (G_ "Unsupported archive format; \
319cannot determine package dependencies from source archive: ~a~%")
320 (basename source-url))
d514276b 321 (list '() '()))))
266785d2 322
01589acc
MC
323 ;; First, try to compute the requirements using the wheel, else, fallback to
324 ;; reading the "requires.txt" from the egg-info directory from the source
d514276b 325 ;; archive.
266785d2
CR
326 (or (guess-requirements-from-wheel)
327 (guess-requirements-from-source)))
328
c799ad72 329(define (compute-inputs source-url wheel-url archive)
d514276b
MC
330 "Given the SOURCE-URL and WHEEL-URL of an already downloaded ARCHIVE, return
331a pair of lists, each consisting of a list of name/variable pairs, for the
332propagated inputs and the native inputs, respectively. Also
ac906cb7 333return the unaltered list of upstream dependency names."
d514276b
MC
334
335 (define (strip-argparse deps)
336 (remove (cut string=? "argparse" <>) deps))
337
338 (define (requirement->package-name/sort deps)
339 (sort
340 (map (lambda (input)
341 (let ((guix-name (python->package-name input)))
342 (list guix-name (list 'unquote (string->symbol guix-name)))))
343 deps)
344 (lambda args
345 (match args
346 (((a _ ...) (b _ ...))
347 (string-ci<? a b))))))
348
349 (define process-requirements
350 (compose requirement->package-name/sort strip-argparse))
351
352 (let ((dependencies (guess-requirements source-url wheel-url archive)))
353 (values (map process-requirements dependencies)
354 (concatenate dependencies))))
1b3e9685 355
266785d2 356(define (make-pypi-sexp name version source-url wheel-url home-page synopsis
1b3e9685
DT
357 description license)
358 "Return the `package' s-expression for a python package with the given NAME,
359VERSION, SOURCE-URL, HOME-PAGE, SYNOPSIS, DESCRIPTION, and LICENSE."
ff986890
CR
360 (call-with-temporary-output-file
361 (lambda (temp port)
362 (and (url-fetch source-url temp)
d514276b 363 (receive (guix-dependencies upstream-dependencies)
ac906cb7 364 (compute-inputs source-url wheel-url temp)
d514276b
MC
365 (match guix-dependencies
366 ((required-inputs test-inputs)
367 (values
368 `(package
369 (name ,(python->package-name name))
370 (version ,version)
4b60ab8c
MC
371 (source
372 (origin
373 (method url-fetch)
374 ;; PyPI URL are case sensitive, but sometimes a project
375 ;; named using mixed case has a URL using lower case, so
376 ;; we must work around this inconsistency. For actual
377 ;; examples, compare the URLs of the "Deprecated" and
378 ;; "uWSGI" PyPI packages.
379 (uri ,(if (string-contains source-url name)
380 `(pypi-uri ,name version)
381 `(pypi-uri ,(string-downcase name) version)))
382 (sha256
383 (base32
384 ,(guix-hash-url temp)))))
d514276b
MC
385 (build-system python-build-system)
386 ,@(maybe-inputs required-inputs 'propagated-inputs)
387 ,@(maybe-inputs test-inputs 'native-inputs)
388 (home-page ,home-page)
389 (synopsis ,synopsis)
390 (description ,description)
391 (license ,(license->symbol license)))
392 upstream-dependencies))))))))
1b3e9685 393
ac906cb7
RW
394(define pypi->guix-package
395 (memoize
396 (lambda* (package-name)
397 "Fetch the metadata for PACKAGE-NAME from pypi.org, and return the
467a3c93 398`package' s-expression corresponding to that package, or #f on failure."
ac906cb7
RW
399 (let ((package (pypi-fetch package-name)))
400 (and package
401 (guard (c ((missing-source-error? c)
402 (let ((package (missing-source-error-package c)))
403 (leave (G_ "no source release for pypi package ~a ~a~%")
404 (assoc-ref* package "info" "name")
405 (assoc-ref* package "info" "version")))))
406 (let ((name (assoc-ref* package "info" "name"))
407 (version (assoc-ref* package "info" "version"))
408 (release (assoc-ref (latest-source-release package) "url"))
409 (wheel (assoc-ref (latest-wheel-release package) "url"))
410 (synopsis (assoc-ref* package "info" "summary"))
411 (description (assoc-ref* package "info" "summary"))
412 (home-page (assoc-ref* package "info" "home_page"))
413 (license (string->license (assoc-ref* package "info" "license"))))
414 (make-pypi-sexp name version release wheel home-page synopsis
415 description license))))))))
416
417(define (pypi-recursive-import package-name)
418 (recursive-import package-name #f
419 #:repo->guix-package (lambda (name repo)
420 (pypi->guix-package name))
421 #:guix-name python->package-name))
bab020d7 422
263ac57f
DC
423(define (string->license str)
424 "Convert the string STR into a license object."
425 (match str
426 ("GNU LGPL" license:lgpl2.0)
427 ("GPL" license:gpl3)
428 ((or "BSD" "BSD License") license:bsd-3)
429 ((or "MIT" "MIT license" "Expat license") license:expat)
430 ("Public domain" license:public-domain)
431 ((or "Apache License, Version 2.0" "Apache 2.0") license:asl2.0)
432 (_ #f)))
433
bab020d7
CR
434(define (pypi-package? package)
435 "Return true if PACKAGE is a Python package from PyPI."
436
437 (define (pypi-url? url)
8440db45
MB
438 (or (string-prefix? "https://pypi.org/" url)
439 (string-prefix? "https://pypi.python.org/" url)
7277d06d 440 (string-prefix? "https://pypi.org/packages" url)))
bab020d7
CR
441
442 (let ((source-url (and=> (package-source package) origin-uri))
443 (fetch-method (and=> (package-source package) origin-method)))
444 (and (eq? fetch-method download:url-fetch)
445 (match source-url
446 ((? string?)
447 (pypi-url? source-url))
448 ((source-url ...)
449 (any pypi-url? source-url))))))
450
7d27a025
LC
451(define (latest-release package)
452 "Return an <upstream-source> for the latest release of PACKAGE."
22fc581d
MO
453 (let* ((pypi-name (guix-package->pypi-name package))
454 (pypi-package (pypi-fetch pypi-name)))
455 (and pypi-package
456 (guard (c ((missing-source-error? c) #f))
457 (let* ((metadata pypi-package)
458 (version (assoc-ref* metadata "info" "version"))
459 (url (assoc-ref (latest-source-release metadata) "url")))
460 (upstream-source
461 (package (package-name package))
462 (version version)
463 (urls (list url))))))))
bab020d7
CR
464
465(define %pypi-updater
466 (upstream-updater
467 (name 'pypi)
468 (description "Updater for PyPI packages")
469 (pred pypi-package?)
470 (latest latest-release)))