import: Add 'cran' importer.
[jackhill/guix/guix.git] / guix / import / cran.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2015 Ricardo Wurmus <rekado@elephly.net>
3 ;;;
4 ;;; This file is part of GNU Guix.
5 ;;;
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
10 ;;;
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19 (define-module (guix import cran)
20 #:use-module (ice-9 match)
21 #:use-module (ice-9 regex)
22 #:use-module (srfi srfi-1)
23 #:use-module (sxml simple)
24 #:use-module (sxml match)
25 #:use-module (sxml xpath)
26 #:use-module (guix http-client)
27 #:use-module (guix hash)
28 #:use-module (guix store)
29 #:use-module (guix base32)
30 #:use-module ((guix download) #:select (download-to-store))
31 #:use-module (guix import utils)
32 #:export (cran->guix-package))
33
34 ;;; Commentary:
35 ;;;
36 ;;; Generate a package declaration template for the latest version of an R
37 ;;; package on CRAN, using the HTML description downloaded from
38 ;;; cran.r-project.org.
39 ;;;
40 ;;; Code:
41
42 (define string->license
43 (match-lambda
44 ("AGPL-3" 'agpl3+)
45 ("Artistic-2.0" 'artistic2.0)
46 ("Apache License 2.0" 'asl2.0)
47 ("BSD_2_clause" 'bsd-2)
48 ("BSD_3_clause" 'bsd-3)
49 ("GPL-2" 'gpl2+)
50 ("GPL-3" 'gpl3+)
51 ("LGPL-2" 'lgpl2.0+)
52 ("LGPL-2.1" 'lgpl2.1+)
53 ("LGPL-3" 'lgpl3+)
54 ("MIT" 'x11)
55 ((x) (string->license x))
56 ((lst ...) `(list ,@(map string->license lst)))
57 (_ #f)))
58
59 (define (format-inputs names)
60 "Generate a sorted list of package inputs from a list of package NAMES."
61 (map (lambda (name)
62 (list name (list 'unquote (string->symbol name))))
63 (sort names string-ci<?)))
64
65 (define* (maybe-inputs package-inputs #:optional (type 'inputs))
66 "Given a list of PACKAGE-INPUTS, tries to generate the TYPE field of a
67 package definition."
68 (match package-inputs
69 (()
70 '())
71 ((package-inputs ...)
72 `((,type (,'quasiquote ,(format-inputs package-inputs)))))))
73
74 (define (table-datum tree label)
75 "Extract the datum node following a LABEL in the sxml table TREE. Only the
76 first cell of a table row is considered a label cell."
77 ((node-pos 1)
78 ((sxpath `(xhtml:tr
79 (xhtml:td 1) ; only first cell can contain label
80 (equal? ,label)
81 ,(node-parent tree) ; go up to label cell
82 ,(node-parent tree) ; go up to matching row
83 (xhtml:td 2))) ; select second cell
84 tree)))
85
86 (define %cran-url "http://cran.r-project.org/web/packages/")
87
88 (define (cran-fetch name)
89 "Return an sxml representation of the CRAN page for the R package NAME,
90 or #f on failure. NAME is case-sensitive."
91 ;; This API always returns the latest release of the module.
92 (let ((cran-url (string-append %cran-url name)))
93 (false-if-exception
94 (xml->sxml (http-fetch cran-url)
95 #:trim-whitespace? #t
96 #:namespaces '((xhtml . "http://www.w3.org/1999/xhtml"))
97 #:default-entity-handler
98 (lambda (port name)
99 (case name
100 ((nbsp) " ")
101 ((ge) ">=")
102 ((gt) ">")
103 ((lt) "<")
104 (else
105 (format (current-warning-port)
106 "~a:~a:~a: undefined entitity: ~a\n"
107 cran-url (port-line port) (port-column port)
108 name)
109 (symbol->string name))))))))
110
111 (define (cran-sxml->sexp sxml)
112 "Return the `package' s-expression for a CRAN package from the SXML
113 representation of the package page."
114 (define (nodes->text nodeset)
115 (string-join ((sxpath '(// *text*)) nodeset) " "))
116
117 (define (guix-name name)
118 (if (string-prefix? "r-" name)
119 (string-downcase name)
120 (string-append "r-" (string-downcase name))))
121
122 (sxml-match-let*
123 (((*TOP* (xhtml:html
124 ,head
125 (xhtml:body
126 (xhtml:h2 ,name-and-synopsis)
127 (xhtml:p ,description)
128 ,summary
129 (xhtml:h4 "Downloads:") ,downloads
130 . ,rest)))
131 sxml))
132 (let* ((name (match:prefix (string-match ": " name-and-synopsis)))
133 (synopsis (match:suffix (string-match ": " name-and-synopsis)))
134 (version (nodes->text (table-datum summary "Version:")))
135 (license ((compose string->license nodes->text)
136 (table-datum summary "License:")))
137 (home-page (nodes->text ((sxpath '((xhtml:a 1)))
138 (table-datum summary "URL:"))))
139 (source-url (string-append "mirror://cran/"
140 ;; Remove double dots, because we want an
141 ;; absolute path.
142 (regexp-substitute/global
143 #f "\\.\\./"
144 (string-join
145 ((sxpath '((xhtml:a 1) @ href *text*))
146 (table-datum downloads
147 " Package source: ")))
148 'pre 'post)))
149 (tarball (with-store store (download-to-store store source-url)))
150 (sysdepends (map match:substring
151 (list-matches
152 "[^ ]+"
153 ;; Strip off comma and parenthetical
154 ;; expressions.
155 (regexp-substitute/global
156 #f "(,|\\([^\\)]+\\))"
157 (nodes->text (table-datum summary
158 "SystemRequirements:"))
159 'pre 'post))))
160 (imports (map guix-name
161 ((sxpath '(// xhtml:a *text*))
162 (table-datum summary "Imports:")))))
163 `(package
164 (name ,(guix-name name))
165 (version ,version)
166 (source (origin
167 (method url-fetch)
168 (uri (string-append ,@(factorize-uri source-url version)))
169 (sha256
170 (base32
171 ,(bytevector->nix-base32-string (file-sha256 tarball))))))
172 (build-system r-build-system)
173 ,@(maybe-inputs sysdepends)
174 ,@(maybe-inputs imports 'propagated-inputs)
175 (home-page ,(if (string-null? home-page)
176 (string-append %cran-url name)
177 home-page))
178 (synopsis ,synopsis)
179 ;; Use double spacing
180 (description ,(regexp-substitute/global #f "\\. \\b" description
181 'pre ". " 'post))
182 (license ,license)))))
183
184 (define (cran->guix-package package-name)
185 "Fetch the metadata for PACKAGE-NAME from cran.r-project.org, and return the
186 `package' s-expression corresponding to that package, or #f on failure."
187 (let ((module-meta (cran-fetch package-name)))
188 (and=> module-meta cran-sxml->sexp)))