gnu: gspell: Build with gobject-introspection.
[jackhill/guix/guix.git] / guix / cve.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2015, 2016, 2017, 2018, 2019 Ludovic Courtès <ludo@gnu.org>
3 ;;;
4 ;;; This file is part of GNU Guix.
5 ;;;
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
10 ;;;
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
15 ;;;
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
18
19 (define-module (guix cve)
20 #:use-module (guix utils)
21 #:use-module (guix http-client)
22 #:use-module (guix json)
23 #:use-module (guix i18n)
24 #:use-module (json)
25 #:use-module (web uri)
26 #:use-module (srfi srfi-1)
27 #:use-module (srfi srfi-9)
28 #:use-module (srfi srfi-11)
29 #:use-module (srfi srfi-19)
30 #:use-module (srfi srfi-26)
31 #:use-module (srfi srfi-34)
32 #:use-module (srfi srfi-35)
33 #:use-module (ice-9 match)
34 #:use-module (ice-9 regex)
35 #:use-module (ice-9 vlist)
36 #:export (json->cve-items
37
38 cve-item?
39 cve-item-cve
40 cve-item-configurations
41 cve-item-published-date
42 cve-item-last-modified-date
43
44 cve?
45 cve-id
46 cve-data-type
47 cve-data-format
48 cvs-references
49
50 cve-reference?
51 cve-reference-url
52 cve-reference-tags
53
54 vulnerability?
55 vulnerability-id
56 vulnerability-packages
57
58 json->vulnerabilities
59 current-vulnerabilities
60 vulnerabilities->lookup-proc))
61
62 ;;; Commentary:
63 ;;;
64 ;;; This modules provides the tools to fetch, parse, and digest part of the
65 ;;; Common Vulnerabilities and Exposures (CVE) feeds provided by the US NIST
66 ;;; at <https://nvd.nist.gov/vuln/data-feeds>.
67 ;;;
68 ;;; Code:
69
70 (define (string->date* str)
71 (string->date str "~Y-~m-~dT~H:~M~z"))
72
73 (define-json-mapping <cve-item> cve-item cve-item?
74 json->cve-item
75 (cve cve-item-cve "cve" json->cve) ;<cve>
76 (configurations cve-item-configurations ;list of sexps
77 "configurations" configuration-data->cve-configurations)
78 (published-date cve-item-published-date
79 "publishedDate" string->date*)
80 (last-modified-date cve-item-last-modified-date
81 "lastModifiedDate" string->date*))
82
83 (define-json-mapping <cve> cve cve?
84 json->cve
85 (id cve-id "CVE_data_meta" ;string
86 (cut assoc-ref <> "ID"))
87 (data-type cve-data-type ;'CVE
88 "data_type" string->symbol)
89 (data-format cve-data-format ;'MITRE
90 "data_format" string->symbol)
91 (references cve-item-references ;list of <cve-reference>
92 "references" reference-data->cve-references))
93
94 (define-json-mapping <cve-reference> cve-reference cve-reference?
95 json->cve-reference
96 (url cve-reference-url) ;string
97 (tags cve-reference-tags ;list of strings
98 "tags" vector->list))
99
100 (define (reference-data->cve-references alist)
101 (map json->cve-reference
102 (vector->list (assoc-ref alist "reference_data"))))
103
104 (define %cpe-package-rx
105 ;; For applications: "cpe:2.3:a:VENDOR:PACKAGE:VERSION", or sometimes
106 ;; "cpe:2.3:a:VENDOR:PACKAGE:VERSION:PATCH-LEVEL".
107 (make-regexp "^cpe:2\\.3:a:([^:]+):([^:]+):([^:]+):([^:]+):"))
108
109 (define (cpe->package-name cpe)
110 "Converts the Common Platform Enumeration (CPE) string CPE to a package
111 name, in a very naive way. Return two values: the package name, and its
112 version string. Return #f and #f if CPE does not look like an application CPE
113 string."
114 (cond ((regexp-exec %cpe-package-rx cpe)
115 =>
116 (lambda (matches)
117 (values (match:substring matches 2)
118 (match (match:substring matches 3)
119 ("*" '_)
120 (version
121 (string-append version
122 (match (match:substring matches 4)
123 ("" "")
124 (patch-level
125 ;; Drop the colon from things like
126 ;; "cpe:2.3:a:openbsd:openssh:6.8:p1".
127 (string-drop patch-level 1)))))))))
128 (else
129 (values #f #f))))
130
131 (define (cpe-match->cve-configuration alist)
132 "Convert ALIST, a \"cpe_match\" alist, into an sexp representing the package
133 and versions matched. Return #f if ALIST doesn't correspond to an application
134 package."
135 (let ((cpe (assoc-ref alist "cpe23Uri"))
136 (starti (assoc-ref alist "versionStartIncluding"))
137 (starte (assoc-ref alist "versionStartExcluding"))
138 (endi (assoc-ref alist "versionEndIncluding"))
139 (ende (assoc-ref alist "versionEndExcluding")))
140 (let-values (((package version) (cpe->package-name cpe)))
141 (and package
142 `(,package
143 ,(cond ((and (or starti starte) (or endi ende))
144 `(and ,(if starti `(>= ,starti) `(> ,starte))
145 ,(if endi `(<= ,endi) `(< ,ende))))
146 (starti `(>= ,starti))
147 (starte `(> ,starte))
148 (endi `(<= ,endi))
149 (ende `(< ,ende))
150 (else version)))))))
151
152 (define (configuration-data->cve-configurations alist)
153 "Given ALIST, a JSON dictionary for the baroque \"configurations\"
154 element found in CVEs, return an sexp such as (\"binutils\" (<
155 \"2.31\")) that represents matching configurations."
156 (define string->operator
157 (match-lambda
158 ("OR" 'or)
159 ("AND" 'and)))
160
161 (define (node->configuration node)
162 (let ((operator (string->operator (assoc-ref node "operator"))))
163 (cond
164 ((assoc-ref node "cpe_match")
165 =>
166 (lambda (matches)
167 (let ((matches (vector->list matches)))
168 (match (filter-map cpe-match->cve-configuration
169 matches)
170 (() #f)
171 ((one) one)
172 (lst (cons operator lst))))))
173 ((assoc-ref node "children") ;typically for 'and'
174 =>
175 (lambda (children)
176 (match (filter-map node->configuration (vector->list children))
177 (() #f)
178 ((one) one)
179 (lst (cons operator lst)))))
180 (else
181 #f))))
182
183 (let ((nodes (vector->list (assoc-ref alist "nodes"))))
184 (filter-map node->configuration nodes)))
185
186 (define (json->cve-items json)
187 "Parse JSON, an input port or a string, and return a list of <cve-item>
188 records."
189 (let* ((alist (json->scm json))
190 (type (assoc-ref alist "CVE_data_type"))
191 (format (assoc-ref alist "CVE_data_format"))
192 (version (assoc-ref alist "CVE_data_version")))
193 (unless (equal? type "CVE")
194 (raise (condition (&message
195 (message "invalid CVE feed")))))
196 (unless (equal? format "MITRE")
197 (raise (condition
198 (&message
199 (message (format #f (G_ "unsupported CVE format: '~a'")
200 format))))))
201 (unless (equal? version "4.0")
202 (raise (condition
203 (&message
204 (message (format #f (G_ "unsupported CVE data version: '~a'")
205 version))))))
206
207 (map json->cve-item
208 (vector->list (assoc-ref alist "CVE_Items")))))
209
210 (define (version-matches? version sexp)
211 "Return true if VERSION, a string, matches SEXP."
212 (match sexp
213 ('_
214 #t)
215 ((? string? expected)
216 (version-prefix? expected version))
217 (('or sexps ...)
218 (any (cut version-matches? version <>) sexps))
219 (('and sexps ...)
220 (every (cut version-matches? version <>) sexps))
221 (('< max)
222 (version>? max version))
223 (('<= max)
224 (version>=? max version))
225 (('> min)
226 (version>? version min))
227 (('>= min)
228 (version>=? version min))))
229
230 \f
231 ;;;
232 ;;; High-level interface.
233 ;;;
234
235 (define %now
236 (current-date))
237 (define %current-year
238 (date-year %now))
239 (define %past-year
240 (- %current-year 1))
241
242 (define (yearly-feed-uri year)
243 "Return the URI for the CVE feed for YEAR."
244 (string->uri
245 (string-append "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-"
246 (number->string year) ".json.gz")))
247
248 (define %current-year-ttl
249 ;; According to <https://nvd.nist.gov/download.cfm#CVE_FEED>, feeds are
250 ;; updated "approximately every two hours."
251 (* 60 30))
252
253 (define %past-year-ttl
254 ;; Update the previous year's database more and more infrequently.
255 (* 3600 24 (date-month %now)))
256
257 (define-record-type <vulnerability>
258 (vulnerability id packages)
259 vulnerability?
260 (id vulnerability-id) ;string
261 (packages vulnerability-packages)) ;((p1 sexp1) (p2 sexp2) ...)
262
263 (define vulnerability->sexp
264 (match-lambda
265 (($ <vulnerability> id packages)
266 `(v ,id ,packages))))
267
268 (define sexp->vulnerability
269 (match-lambda
270 (('v id (packages ...))
271 (vulnerability id packages))))
272
273 (define (cve-configuration->package-list config)
274 "Parse CONFIG, a config sexp, and return a list of the form (P SEXP)
275 where P is a package name and SEXP expresses constraints on the matching
276 versions."
277 (let loop ((config config)
278 (packages '()))
279 (match config
280 (('or configs ...)
281 (fold loop packages configs))
282 (('and config _ ...) ;XXX
283 (loop config packages))
284 (((? string? package) '_) ;any version
285 (cons `(,package _)
286 (alist-delete package packages)))
287 (((? string? package) sexp)
288 (let ((previous (assoc-ref packages package)))
289 (if previous
290 (cons `(,package (or ,sexp ,@previous))
291 (alist-delete package packages))
292 (cons `(,package ,sexp) packages)))))))
293
294 (define (merge-package-lists lst)
295 "Merge the list in LST, each of which has the form (p sexp), where P
296 is the name of a package and SEXP is an sexp that constrains matching
297 versions."
298 (fold (lambda (plist result) ;XXX: quadratic
299 (fold (match-lambda*
300 (((package version) result)
301 (match (assoc-ref result package)
302 (#f
303 (cons `(,package ,version) result))
304 ((previous)
305 (cons `(,package (or ,version ,previous))
306 (alist-delete package result))))))
307 result
308 plist))
309 '()
310 lst))
311
312 (define (cve-item->vulnerability item)
313 "Return a <vulnerability> corresponding to ITEM, a <cve-item> record;
314 return #f if ITEM does not list any configuration or if it does not list
315 any \"a\" (application) configuration."
316 (let ((id (cve-id (cve-item-cve item))))
317 (match (cve-item-configurations item)
318 (() ;no configurations
319 #f)
320 ((configs ...)
321 (vulnerability id
322 (merge-package-lists
323 (map cve-configuration->package-list configs)))))))
324
325 (define (json->vulnerabilities json)
326 "Parse JSON, an input port or a string, and return the list of
327 vulnerabilities found therein."
328 (filter-map cve-item->vulnerability (json->cve-items json)))
329
330 (define (write-cache input cache)
331 "Read vulnerabilities as gzipped JSON from INPUT, and write it as a compact
332 sexp to CACHE."
333 (call-with-decompressed-port 'gzip input
334 (lambda (input)
335 (define vulns
336 (json->vulnerabilities input))
337
338 (write `(vulnerabilities
339 1 ;format version
340 ,(map vulnerability->sexp vulns))
341 cache))))
342
343 (define (fetch-vulnerabilities year ttl)
344 "Return the list of <vulnerability> for YEAR, assuming the on-disk cache has
345 the given TTL (fetch from the NIST web site when TTL has expired)."
346 (define (cache-miss uri)
347 (format (current-error-port) "fetching CVE database for ~a...~%" year))
348
349 (define (read* port)
350 ;; Disable read options to avoid populating the source property weak
351 ;; table, which speeds things up, saves memory, and works around
352 ;; <https://lists.gnu.org/archive/html/guile-devel/2017-09/msg00031.html>.
353 (let ((options (read-options)))
354 (dynamic-wind
355 (lambda ()
356 (read-disable 'positions))
357 (lambda ()
358 (read port))
359 (lambda ()
360 (read-options options)))))
361
362 ;; Note: We used to keep the original JSON files in cache but parsing it
363 ;; would take typically ~15s for a year of data. Thus, we instead store a
364 ;; summarized version thereof as an sexp, which can be parsed in 1s or so.
365 (let* ((port (http-fetch/cached (yearly-feed-uri year)
366 #:ttl ttl
367 #:write-cache write-cache
368 #:cache-miss cache-miss))
369 (sexp (read* port)))
370 (close-port port)
371 (match sexp
372 (('vulnerabilities 1 vulns)
373 (map sexp->vulnerability vulns)))))
374
375 (define (current-vulnerabilities)
376 "Return the current list of Common Vulnerabilities and Exposures (CVE) as
377 published by the US NIST."
378 (let ((past-years (unfold (cut > <> 3)
379 (lambda (n)
380 (- %current-year n))
381 1+
382 1))
383 (past-ttls (unfold (cut > <> 3)
384 (lambda (n)
385 (* n %past-year-ttl))
386 1+
387 1)))
388 (append-map fetch-vulnerabilities
389 (cons %current-year past-years)
390 (cons %current-year-ttl past-ttls))))
391
392 (define (vulnerabilities->lookup-proc vulnerabilities)
393 "Return a lookup procedure built from VULNERABILITIES that takes a package
394 name and optionally a version number. When the version is omitted, the lookup
395 procedure returns a list of vulnerabilities; otherwise, it returns a list of
396 vulnerabilities affecting the given package version."
397 (define table
398 ;; Map package names to lists of version/vulnerability pairs.
399 (fold (lambda (vuln table)
400 (match vuln
401 (($ <vulnerability> id packages)
402 (fold (lambda (package table)
403 (match package
404 ((name . versions)
405 (vhash-cons name (cons vuln versions)
406 table))))
407 table
408 packages))))
409 vlist-null
410 vulnerabilities))
411
412 (lambda* (package #:optional version)
413 (vhash-fold* (if version
414 (lambda (pair result)
415 (match pair
416 ((vuln sexp)
417 (if (version-matches? version sexp)
418 (cons vuln result)
419 result))))
420 (lambda (pair result)
421 (match pair
422 ((vuln . _)
423 (cons vuln result)))))
424 '()
425 package table)))
426
427
428 ;;; cve.scm ends here