Commit | Line | Data |
---|---|---|
49e6291a | 1 | ;;; GNU Guix --- Functional package management for GNU |
ba97e454 | 2 | ;;; Copyright © 2014, 2015, 2016, 2017 Ludovic Courtès <ludo@gnu.org> |
49e6291a LC |
3 | ;;; |
4 | ;;; This file is part of GNU Guix. | |
5 | ;;; | |
6 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
7 | ;;; under the terms of the GNU General Public License as published by | |
8 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | ;;; your option) any later version. | |
10 | ;;; | |
11 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;;; GNU General Public License for more details. | |
15 | ;;; | |
16 | ;;; You should have received a copy of the GNU General Public License | |
17 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
18 | ||
19 | (define-module (guix scripts offload) | |
21531add LC |
20 | #:use-module (ssh key) |
21 | #:use-module (ssh auth) | |
22 | #:use-module (ssh session) | |
23 | #:use-module (ssh channel) | |
cf283dd9 LC |
24 | #:use-module (ssh popen) |
25 | #:use-module (ssh dist) | |
26 | #:use-module (ssh dist node) | |
0b724753 | 27 | #:use-module (ssh version) |
49e6291a LC |
28 | #:use-module (guix config) |
29 | #:use-module (guix records) | |
987a29ba | 30 | #:use-module (guix ssh) |
49e6291a LC |
31 | #:use-module (guix store) |
32 | #:use-module (guix derivations) | |
2535635f LC |
33 | #:use-module ((guix serialization) |
34 | #:select (nar-error? nar-error-file)) | |
49e6291a LC |
35 | #:use-module (guix nar) |
36 | #:use-module (guix utils) | |
0a40626f | 37 | #:use-module ((guix build syscalls) #:select (fcntl-flock)) |
f326fef8 | 38 | #:use-module ((guix build utils) #:select (which mkdir-p)) |
49e6291a LC |
39 | #:use-module (guix ui) |
40 | #:use-module (srfi srfi-1) | |
6c41cce0 | 41 | #:use-module (srfi srfi-11) |
49e6291a LC |
42 | #:use-module (srfi srfi-26) |
43 | #:use-module (srfi srfi-34) | |
44 | #:use-module (srfi srfi-35) | |
45 | #:use-module (ice-9 popen) | |
46 | #:use-module (ice-9 rdelim) | |
47 | #:use-module (ice-9 match) | |
48 | #:use-module (ice-9 regex) | |
49 | #:use-module (ice-9 format) | |
2535635f | 50 | #:use-module (ice-9 binary-ports) |
49e6291a LC |
51 | #:export (build-machine |
52 | build-requirements | |
53 | guix-offload)) | |
54 | ||
55 | ;;; Commentary: | |
56 | ;;; | |
57 | ;;; Attempt to offload builds to the machines listed in | |
58 | ;;; /etc/guix/machines.scm, transferring missing dependencies over SSH, and | |
59 | ;;; retrieving the build output(s) over SSH upon success. | |
60 | ;;; | |
61 | ;;; This command should not be used directly; instead, it is called on-demand | |
62 | ;;; by the daemon, unless it was started with '--no-build-hook' or a client | |
63 | ;;; inhibited build hooks. | |
64 | ;;; | |
65 | ;;; Code: | |
66 | ||
67 | ||
68 | (define-record-type* <build-machine> | |
69 | build-machine make-build-machine | |
70 | build-machine? | |
71 | (name build-machine-name) ; string | |
cecd72d5 LC |
72 | (port build-machine-port ; number |
73 | (default 22)) | |
49e6291a LC |
74 | (system build-machine-system) ; string |
75 | (user build-machine-user) ; string | |
76 | (private-key build-machine-private-key ; file name | |
21531add LC |
77 | (default (user-openssh-private-key))) |
78 | (host-key build-machine-host-key) ; string | |
1d48cf94 LC |
79 | (compression build-machine-compression ; string |
80 | (default "zlib@openssh.com,zlib")) | |
81 | (compression-level build-machine-compression-level ;integer | |
82 | (default 3)) | |
cf283dd9 LC |
83 | (daemon-socket build-machine-daemon-socket ; string |
84 | (default "/var/guix/daemon-socket/socket")) | |
49e6291a LC |
85 | (parallel-builds build-machine-parallel-builds ; number |
86 | (default 1)) | |
87 | (speed build-machine-speed ; inexact real | |
88 | (default 1.0)) | |
89 | (features build-machine-features ; list of strings | |
90 | (default '()))) | |
91 | ||
92 | (define-record-type* <build-requirements> | |
93 | build-requirements make-build-requirements | |
94 | build-requirements? | |
95 | (system build-requirements-system) ; string | |
96 | (features build-requirements-features ; list of strings | |
97 | (default '()))) | |
98 | ||
99 | (define %machine-file | |
100 | ;; File that lists machines available as build slaves. | |
101 | (string-append %config-directory "/machines.scm")) | |
102 | ||
21531add LC |
103 | (define (user-openssh-private-key) |
104 | "Return the user's default SSH private key, or #f if it could not be | |
49e6291a LC |
105 | determined." |
106 | (and=> (getenv "HOME") | |
21531add | 107 | (cut string-append <> "/.ssh/id_rsa"))) |
49e6291a LC |
108 | |
109 | (define %user-module | |
110 | ;; Module in which the machine description file is loaded. | |
111 | (let ((module (make-fresh-user-module))) | |
112 | (module-use! module (resolve-interface '(guix scripts offload))) | |
113 | module)) | |
114 | ||
115 | (define* (build-machines #:optional (file %machine-file)) | |
116 | "Read the list of build machines from FILE and return it." | |
117 | (catch #t | |
118 | (lambda () | |
119 | ;; Avoid ABI incompatibility with the <build-machine> record. | |
120 | (set! %fresh-auto-compile #t) | |
121 | ||
122 | (save-module-excursion | |
123 | (lambda () | |
124 | (set-current-module %user-module) | |
92cb2e28 | 125 | (primitive-load file)))) |
49e6291a LC |
126 | (lambda args |
127 | (match args | |
e465d9e1 | 128 | (('system-error . rest) |
49e6291a LC |
129 | (let ((err (system-error-errno args))) |
130 | ;; Silently ignore missing file since this is a common case. | |
131 | (if (= ENOENT err) | |
132 | '() | |
69daee23 | 133 | (leave (G_ "failed to open machine file '~a': ~a~%") |
92cb2e28 | 134 | file (strerror err))))) |
c1202fb1 LC |
135 | (('syntax-error proc message properties form . rest) |
136 | (let ((loc (source-properties->location properties))) | |
69daee23 | 137 | (leave (G_ "~a: ~a~%") |
c1202fb1 | 138 | (location->string loc) message))) |
e465d9e1 | 139 | (x |
69daee23 | 140 | (leave (G_ "failed to load machine file '~a': ~s~%") |
92cb2e28 | 141 | file args)))))) |
49e6291a | 142 | |
21531add LC |
143 | (define (host-key->type+key host-key) |
144 | "Destructure HOST-KEY, an OpenSSH host key string, and return two values: | |
145 | its key type as a symbol, and the actual base64-encoded string." | |
146 | (define (type->symbol type) | |
147 | (and (string-prefix? "ssh-" type) | |
148 | (string->symbol (string-drop type 4)))) | |
149 | ||
150 | (match (string-tokenize host-key) | |
ba97e454 | 151 | ((type key x) |
21531add LC |
152 | (values (type->symbol type) key)) |
153 | ((type key) | |
154 | (values (type->symbol type) key)))) | |
155 | ||
156 | (define (private-key-from-file* file) | |
157 | "Like 'private-key-from-file', but raise an error that 'with-error-handling' | |
158 | can interpret meaningfully." | |
159 | (catch 'guile-ssh-error | |
160 | (lambda () | |
161 | (private-key-from-file file)) | |
162 | (lambda (key proc str . rest) | |
163 | (raise (condition | |
69daee23 | 164 | (&message (message (format #f (G_ "failed to load SSH \ |
21531add LC |
165 | private key from '~a': ~a") |
166 | file str)))))))) | |
167 | ||
168 | (define (open-ssh-session machine) | |
169 | "Open an SSH session for MACHINE and return it. Throw an error on failure." | |
170 | (let ((private (private-key-from-file* (build-machine-private-key machine))) | |
171 | (public (public-key-from-file | |
172 | (string-append (build-machine-private-key machine) | |
173 | ".pub"))) | |
174 | (session (make-session #:user (build-machine-user machine) | |
175 | #:host (build-machine-name machine) | |
176 | #:port (build-machine-port machine) | |
8d125cfc | 177 | #:timeout 10 ;seconds |
21531add LC |
178 | ;; #:log-verbosity 'protocol |
179 | #:identity (build-machine-private-key machine) | |
180 | ||
bd834577 LC |
181 | ;; By default libssh reads ~/.ssh/known_hosts |
182 | ;; and uses that to adjust its choice of cipher | |
183 | ;; suites, which changes the type of host key | |
184 | ;; that the server sends (RSA vs. Ed25519, | |
185 | ;; etc.). Opt for something reproducible and | |
186 | ;; stateless instead. | |
187 | #:knownhosts "/dev/null" | |
188 | ||
21531add LC |
189 | ;; We need lightweight compression when |
190 | ;; exchanging full archives. | |
1d48cf94 LC |
191 | #:compression |
192 | (build-machine-compression machine) | |
193 | #:compression-level | |
194 | (build-machine-compression-level machine)))) | |
74afca5d LC |
195 | (match (connect! session) |
196 | ('ok | |
197 | ;; Authenticate the server. XXX: Guile-SSH 0.10.1 doesn't know about | |
198 | ;; ed25519 keys and 'get-key-type' returns #f in that case. | |
199 | (let-values (((server) (get-server-public-key session)) | |
200 | ((type key) (host-key->type+key | |
201 | (build-machine-host-key machine)))) | |
202 | (unless (and (or (not (get-key-type server)) | |
203 | (eq? (get-key-type server) type)) | |
204 | (string=? (public-key->string server) key)) | |
205 | ;; Key mismatch: something's wrong. XXX: It could be that the server | |
206 | ;; provided its Ed25519 key when we where expecting its RSA key. | |
69daee23 | 207 | (leave (G_ "server at '~a' returned host key '~a' of type '~a' \ |
21531add | 208 | instead of '~a' of type '~a'~%") |
74afca5d LC |
209 | (build-machine-name machine) |
210 | (public-key->string server) (get-key-type server) | |
211 | key type))) | |
212 | ||
213 | (let ((auth (userauth-public-key! session private))) | |
214 | (unless (eq? 'success auth) | |
215 | (disconnect! session) | |
69daee23 | 216 | (leave (G_ "SSH public key authentication failed for '~a': ~a~%") |
74afca5d LC |
217 | (build-machine-name machine) (get-error session)))) |
218 | ||
219 | session) | |
220 | (x | |
221 | ;; Connection failed or timeout expired. | |
69daee23 | 222 | (leave (G_ "failed to connect to '~a': ~a~%") |
74afca5d | 223 | (build-machine-name machine) (get-error session)))))) |
21531add | 224 | |
59f704df LC |
225 | \f |
226 | ;;; | |
227 | ;;; Synchronization. | |
228 | ;;; | |
229 | ||
230 | (define (lock-file file) | |
231 | "Wait and acquire an exclusive lock on FILE. Return an open port." | |
232 | (mkdir-p (dirname file)) | |
233 | (let ((port (open-file file "w0"))) | |
234 | (fcntl-flock port 'write-lock) | |
235 | port)) | |
236 | ||
237 | (define (unlock-file lock) | |
238 | "Unlock LOCK." | |
239 | (fcntl-flock lock 'unlock) | |
240 | (close-port lock) | |
241 | #t) | |
242 | ||
243 | (define-syntax-rule (with-file-lock file exp ...) | |
244 | "Wait to acquire a lock on FILE and evaluate EXP in that context." | |
245 | (let ((port (lock-file file))) | |
246 | (dynamic-wind | |
247 | (lambda () | |
248 | #t) | |
249 | (lambda () | |
250 | exp ...) | |
251 | (lambda () | |
252 | (unlock-file port))))) | |
253 | ||
254 | (define-syntax-rule (with-machine-lock machine hint exp ...) | |
255 | "Wait to acquire MACHINE's exclusive lock for HINT, and evaluate EXP in that | |
256 | context." | |
257 | (with-file-lock (machine-lock-file machine hint) | |
258 | exp ...)) | |
259 | ||
260 | ||
261 | (define (machine-slot-file machine slot) | |
262 | "Return the file name of MACHINE's file for SLOT." | |
263 | ;; For each machine we have a bunch of files representing each build slot. | |
264 | ;; When choosing a build machine, we attempt to get an exclusive lock on one | |
265 | ;; of these; if we fail, that means all the build slots are already taken. | |
266 | ;; Inspired by Nix's build-remote.pl. | |
267 | (string-append (string-append %state-directory "/offload/" | |
268 | (build-machine-name machine) | |
269 | "/" (number->string slot)))) | |
270 | ||
271 | (define (acquire-build-slot machine) | |
272 | "Attempt to acquire a build slot on MACHINE. Return the port representing | |
273 | the slot, or #f if none is available. | |
274 | ||
275 | This mechanism allows us to set a hard limit on the number of simultaneous | |
276 | connections allowed to MACHINE." | |
277 | (mkdir-p (dirname (machine-slot-file machine 0))) | |
278 | (with-machine-lock machine 'slots | |
279 | (any (lambda (slot) | |
280 | (let ((port (open-file (machine-slot-file machine slot) | |
281 | "w0"))) | |
282 | (catch 'flock-error | |
283 | (lambda () | |
284 | (fcntl-flock port 'write-lock #:wait? #f) | |
285 | ;; Got it! | |
286 | (format (current-error-port) | |
287 | "process ~a acquired build slot '~a'~%" | |
288 | (getpid) (port-filename port)) | |
289 | port) | |
290 | (lambda args | |
291 | ;; PORT is already locked by another process. | |
292 | (close-port port) | |
293 | #f)))) | |
294 | (iota (build-machine-parallel-builds machine))))) | |
295 | ||
296 | (define (release-build-slot slot) | |
297 | "Release SLOT, a build slot as returned as by 'acquire-build-slot'." | |
298 | (close-port slot)) | |
299 | ||
300 | \f | |
301 | ;;; | |
302 | ;;; Offloading. | |
303 | ;;; | |
304 | ||
d81195bf LC |
305 | (define (build-log-port) |
306 | "Return the default port where build logs should be sent. The default is | |
307 | file descriptor 4, which is open by the daemon before running the offload | |
308 | hook." | |
309 | (let ((port (fdopen 4 "w0"))) | |
310 | ;; Make sure file descriptor 4 isn't closed when PORT is GC'd. | |
311 | (set-port-revealed! port 1) | |
312 | port)) | |
313 | ||
88da0b68 LC |
314 | (define* (transfer-and-offload drv machine |
315 | #:key | |
316 | (inputs '()) | |
317 | (outputs '()) | |
318 | (max-silent-time 3600) | |
714084e6 | 319 | build-timeout |
88da0b68 LC |
320 | print-build-trace?) |
321 | "Offload DRV to MACHINE. Prior to the actual offloading, transfer all of | |
322 | INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from | |
323 | MACHINE." | |
9e76eed3 LC |
324 | (define session |
325 | (open-ssh-session machine)) | |
326 | ||
cf283dd9 LC |
327 | (define store |
328 | (connect-to-remote-daemon session | |
329 | (build-machine-daemon-socket machine))) | |
330 | ||
331 | (set-build-options store | |
332 | #:print-build-trace print-build-trace? | |
333 | #:max-silent-time max-silent-time | |
334 | #:timeout build-timeout) | |
335 | ||
336 | ;; Protect DRV from garbage collection. | |
337 | (add-temp-root store (derivation-file-name drv)) | |
338 | ||
987a29ba LC |
339 | (with-store local |
340 | (send-files local (cons (derivation-file-name drv) inputs) store | |
341 | #:log-port (current-output-port))) | |
cf283dd9 LC |
342 | (format (current-error-port) "offloading '~a' to '~a'...~%" |
343 | (derivation-file-name drv) (build-machine-name machine)) | |
344 | (format (current-error-port) "@ build-remote ~a ~a~%" | |
345 | (derivation-file-name drv) (build-machine-name machine)) | |
346 | ||
347 | (guard (c ((nix-protocol-error? c) | |
348 | (format (current-error-port) | |
69daee23 | 349 | (G_ "derivation '~a' offloaded to '~a' failed: ~a~%") |
cf283dd9 LC |
350 | (derivation-file-name drv) |
351 | (build-machine-name machine) | |
352 | (nix-protocol-error-message c)) | |
353 | ;; Use exit code 100 for a permanent build failure. The daemon | |
354 | ;; interprets other non-zero codes as transient build failures. | |
355 | (primitive-exit 100))) | |
0237d797 LC |
356 | (parameterize ((current-build-output-port (build-log-port))) |
357 | (build-derivations store (list drv)))) | |
cf283dd9 | 358 | |
987a29ba | 359 | (retrieve-files* outputs store) |
cf283dd9 LC |
360 | (format (current-error-port) "done with offloaded '~a'~%" |
361 | (derivation-file-name drv))) | |
362 | ||
987a29ba LC |
363 | (define (retrieve-files* files remote) |
364 | "Retrieve FILES from REMOTE and import them using 'restore-file-set'." | |
365 | (let-values (((port count) | |
366 | (file-retrieval-port files remote))) | |
e11c42f2 LC |
367 | (format #t (N_ "retrieving ~a store item from '~a'...~%" |
368 | "retrieving ~a store items from '~a'...~%" count) | |
987a29ba | 369 | count (remote-store-host remote)) |
cf283dd9 LC |
370 | |
371 | ;; We cannot use the 'import-paths' RPC here because we already | |
372 | ;; hold the locks for FILES. | |
373 | (let ((result (restore-file-set port | |
374 | #:log-port (current-error-port) | |
375 | #:lock? #f))) | |
376 | (close-port port) | |
377 | result))) | |
49e6291a | 378 | |
59f704df LC |
379 | \f |
380 | ;;; | |
381 | ;;; Scheduling. | |
382 | ;;; | |
383 | ||
49e6291a LC |
384 | (define (machine-matches? machine requirements) |
385 | "Return #t if MACHINE matches REQUIREMENTS." | |
386 | (and (string=? (build-requirements-system requirements) | |
387 | (build-machine-system machine)) | |
388 | (lset<= string=? | |
389 | (build-requirements-features requirements) | |
390 | (build-machine-features machine)))) | |
391 | ||
165f4b2a LC |
392 | (define (machine-load machine) |
393 | "Return the load of MACHINE, divided by the number of parallel builds | |
463fb7d0 | 394 | allowed on MACHINE. Return +∞ if MACHINE is unreachable." |
1cd1d8a7 | 395 | ;; Note: This procedure is costly since it creates a new SSH session. |
463fb7d0 LC |
396 | (match (false-if-exception (open-ssh-session machine)) |
397 | ((? session? session) | |
398 | (let* ((pipe (open-remote-pipe* session OPEN_READ | |
bc1ad4e3 | 399 | "cat" "/proc/loadavg")) |
463fb7d0 LC |
400 | (line (read-line pipe))) |
401 | (close-port pipe) | |
402 | ||
403 | (if (eof-object? line) | |
404 | +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded | |
405 | (match (string-tokenize line) | |
ba97e454 | 406 | ((one five fifteen . x) |
463fb7d0 LC |
407 | (let* ((raw (string->number five)) |
408 | (jobs (build-machine-parallel-builds machine)) | |
409 | (normalized (/ raw jobs))) | |
410 | (format (current-error-port) "load on machine '~a' is ~s\ | |
165f4b2a | 411 | (normalized: ~s)~%" |
463fb7d0 LC |
412 | (build-machine-name machine) raw normalized) |
413 | normalized)) | |
ba97e454 | 414 | (x |
463fb7d0 | 415 | +inf.0))))) ;something's fishy about MACHINE, so avoid it |
ba97e454 | 416 | (x |
463fb7d0 | 417 | +inf.0))) ;failed to connect to MACHINE, so avoid it |
165f4b2a | 418 | |
178f5828 LC |
419 | (define (machine-lock-file machine hint) |
420 | "Return the name of MACHINE's lock file for HINT." | |
f326fef8 | 421 | (string-append %state-directory "/offload/" |
178f5828 LC |
422 | (build-machine-name machine) |
423 | "." (symbol->string hint) ".lock")) | |
f326fef8 | 424 | |
d652b851 LC |
425 | (define (machine-choice-lock-file) |
426 | "Return the name of the file used as a lock when choosing a build machine." | |
427 | (string-append %state-directory "/offload/machine-choice.lock")) | |
428 | ||
d652b851 | 429 | |
5cc569dc LC |
430 | (define %slots |
431 | ;; List of acquired build slots (open ports). | |
432 | '()) | |
433 | ||
88da0b68 LC |
434 | (define (choose-build-machine machines) |
435 | "Return the best machine among MACHINES, or #f." | |
d652b851 LC |
436 | |
437 | ;; Proceed like this: | |
438 | ;; 1. Acquire the global machine-choice lock. | |
439 | ;; 2. For all MACHINES, attempt to acquire a build slot, and filter out | |
440 | ;; those machines for which we failed. | |
441 | ;; 3. Choose the best machine among those that are left. | |
442 | ;; 4. Release the previously-acquired build slots of the other machines. | |
443 | ;; 5. Release the global machine-choice lock. | |
444 | ||
445 | (with-file-lock (machine-choice-lock-file) | |
1cd1d8a7 | 446 | (define machines+slots+loads |
7df3ade1 | 447 | (filter-map (lambda (machine) |
1cd1d8a7 LC |
448 | ;; Call 'machine-load' from here to make sure it is called |
449 | ;; only once per machine (it is expensive). | |
7df3ade1 | 450 | (let ((slot (acquire-build-slot machine))) |
1cd1d8a7 LC |
451 | (and slot |
452 | (list machine slot (machine-load machine))))) | |
7df3ade1 | 453 | machines)) |
d652b851 LC |
454 | |
455 | (define (undecorate pred) | |
1a8ea0a1 LC |
456 | (lambda (a b) |
457 | (match a | |
1cd1d8a7 | 458 | ((machine1 slot1 load1) |
1a8ea0a1 | 459 | (match b |
1cd1d8a7 LC |
460 | ((machine2 slot2 load2) |
461 | (pred machine1 load1 machine2 load2))))))) | |
462 | ||
463 | (define (machine-less-loaded-or-faster? m1 l1 m2 l2) | |
464 | ;; Return #t if M1 is either less loaded or faster than M2, with L1 | |
465 | ;; being the load of M1 and L2 the load of M2. (This relation defines a | |
466 | ;; total order on machines.) | |
467 | (> (/ (build-machine-speed m1) (+ 1 l1)) | |
468 | (/ (build-machine-speed m2) (+ 1 l2)))) | |
469 | ||
470 | (let loop ((machines+slots+loads | |
471 | (sort machines+slots+loads | |
b1fea303 | 472 | (undecorate machine-less-loaded-or-faster?)))) |
1cd1d8a7 LC |
473 | (match machines+slots+loads |
474 | (((best slot load) others ...) | |
d652b851 | 475 | ;; Return the best machine unless it's already overloaded. |
1cd1d8a7 | 476 | (if (< load 2.) |
b1fea303 | 477 | (match others |
1cd1d8a7 | 478 | (((machines slots loads) ...) |
b1fea303 LC |
479 | ;; Release slots from the uninteresting machines. |
480 | (for-each release-build-slot slots) | |
481 | ||
482 | ;; Prevent SLOT from being GC'd. | |
483 | (set! %slots (cons slot %slots)) | |
484 | best)) | |
5cc569dc | 485 | (begin |
b1fea303 | 486 | ;; BEST is overloaded, so try the next one. |
d652b851 | 487 | (release-build-slot slot) |
b1fea303 | 488 | (loop others)))) |
d652b851 | 489 | (() #f))))) |
49e6291a LC |
490 | |
491 | (define* (process-request wants-local? system drv features | |
492 | #:key | |
493 | print-build-trace? (max-silent-time 3600) | |
714084e6 | 494 | build-timeout) |
49e6291a | 495 | "Process a request to build DRV." |
88da0b68 LC |
496 | (let* ((local? (and wants-local? (string=? system (%current-system)))) |
497 | (reqs (build-requirements | |
498 | (system system) | |
499 | (features features))) | |
500 | (candidates (filter (cut machine-matches? <> reqs) | |
501 | (build-machines)))) | |
502 | (match candidates | |
503 | (() | |
504 | ;; We'll never be able to match REQS. | |
505 | (display "# decline\n")) | |
ba97e454 | 506 | ((x ...) |
88da0b68 LC |
507 | (let ((machine (choose-build-machine candidates))) |
508 | (if machine | |
509 | (begin | |
510 | ;; Offload DRV to MACHINE. | |
511 | (display "# accept\n") | |
512 | (let ((inputs (string-tokenize (read-line))) | |
513 | (outputs (string-tokenize (read-line)))) | |
514 | (transfer-and-offload drv machine | |
515 | #:inputs inputs | |
516 | #:outputs outputs | |
517 | #:max-silent-time max-silent-time | |
518 | #:build-timeout build-timeout | |
519 | #:print-build-trace? print-build-trace?))) | |
520 | ||
521 | ;; Not now, all the machines are busy. | |
522 | (display "# postpone\n"))))))) | |
49e6291a | 523 | |
49e6291a | 524 | \f |
aebaee95 LC |
525 | ;;; |
526 | ;;; Installation tests. | |
527 | ;;; | |
528 | ||
529 | (define (assert-node-repl node name) | |
530 | "Bail out if NODE is not running Guile." | |
531 | (match (node-guile-version node) | |
532 | (#f | |
69daee23 | 533 | (leave (G_ "Guile could not be started on '~a'~%") |
aebaee95 LC |
534 | name)) |
535 | ((? string? version) | |
536 | ;; Note: The version string already contains the word "Guile". | |
69daee23 | 537 | (info (G_ "'~a' is running ~a~%") |
aebaee95 LC |
538 | name (node-guile-version node))))) |
539 | ||
540 | (define (assert-node-has-guix node name) | |
541 | "Bail out if NODE lacks the (guix) module, or if its daemon is not running." | |
542 | (match (node-eval node | |
543 | '(begin | |
544 | (use-modules (guix)) | |
545 | (with-store store | |
546 | (add-text-to-store store "test" | |
547 | "Hello, build machine!")))) | |
548 | ((? string? str) | |
69daee23 | 549 | (info (G_ "Guix is usable on '~a' (test returned ~s)~%") |
aebaee95 LC |
550 | name str)) |
551 | (x | |
69daee23 | 552 | (leave (G_ "failed to use Guix module on '~a' (test returned ~s)~%") |
aebaee95 LC |
553 | name x)))) |
554 | ||
555 | (define %random-state | |
556 | (delay | |
557 | (seed->random-state (logxor (getpid) (car (gettimeofday)))))) | |
558 | ||
987a29ba LC |
559 | (define* (nonce #:optional (name (gethostname))) |
560 | (string-append name "-" | |
aebaee95 LC |
561 | (number->string (random 1000000 (force %random-state))))) |
562 | ||
563 | (define (assert-node-can-import node name daemon-socket) | |
564 | "Bail out if NODE refuses to import our archives." | |
565 | (let ((session (node-session node))) | |
566 | (with-store store | |
567 | (let* ((item (add-text-to-store store "export-test" (nonce))) | |
568 | (remote (connect-to-remote-daemon session daemon-socket))) | |
987a29ba LC |
569 | (with-store local |
570 | (send-files local (list item) remote)) | |
571 | ||
aebaee95 | 572 | (if (valid-path? remote item) |
69daee23 | 573 | (info (G_ "'~a' successfully imported '~a'~%") |
aebaee95 | 574 | name item) |
69daee23 | 575 | (leave (G_ "'~a' was not properly imported on '~a'~%") |
aebaee95 LC |
576 | item name)))))) |
577 | ||
578 | (define (assert-node-can-export node name daemon-socket) | |
579 | "Bail out if we cannot import signed archives from NODE." | |
580 | (let* ((session (node-session node)) | |
581 | (remote (connect-to-remote-daemon session daemon-socket)) | |
987a29ba | 582 | (item (add-text-to-store remote "import-test" (nonce name)))) |
aebaee95 | 583 | (with-store store |
987a29ba | 584 | (if (and (retrieve-files store (list item) remote) |
aebaee95 | 585 | (valid-path? store item)) |
69daee23 | 586 | (info (G_ "successfully imported '~a' from '~a'~%") |
aebaee95 | 587 | item name) |
69daee23 | 588 | (leave (G_ "failed to import '~a' from '~a'~%") |
aebaee95 LC |
589 | item name))))) |
590 | ||
27991c97 LC |
591 | (define (check-machine-availability machine-file pred) |
592 | "Check that each machine matching PRED in MACHINE-FILE is usable as a build | |
593 | machine." | |
2b513387 LC |
594 | (define (build-machine=? m1 m2) |
595 | (and (string=? (build-machine-name m1) (build-machine-name m2)) | |
596 | (= (build-machine-port m1) (build-machine-port m2)))) | |
597 | ||
598 | ;; A given build machine may appear several times (e.g., once for | |
599 | ;; "x86_64-linux" and a second time for "i686-linux"); test them only once. | |
27991c97 LC |
600 | (let ((machines (filter pred |
601 | (delete-duplicates (build-machines machine-file) | |
602 | build-machine=?)))) | |
69daee23 | 603 | (info (G_ "testing ~a build machines defined in '~a'...~%") |
aebaee95 LC |
604 | (length machines) machine-file) |
605 | (let* ((names (map build-machine-name machines)) | |
606 | (sockets (map build-machine-daemon-socket machines)) | |
607 | (sessions (map open-ssh-session machines)) | |
608 | (nodes (map make-node sessions))) | |
609 | (for-each assert-node-repl nodes names) | |
610 | (for-each assert-node-has-guix nodes names) | |
611 | (for-each assert-node-can-import nodes names sockets) | |
612 | (for-each assert-node-can-export nodes names sockets)))) | |
613 | ||
614 | \f | |
49e6291a LC |
615 | ;;; |
616 | ;;; Entry point. | |
617 | ;;; | |
618 | ||
619 | (define (guix-offload . args) | |
620 | (define request-line-rx | |
621 | ;; The request format. See 'tryBuildHook' method in build.cc. | |
622 | (make-regexp "([01]) ([a-z0-9_-]+) (/[[:graph:]]+.drv) ([[:graph:]]*)")) | |
623 | ||
624 | (define not-coma | |
625 | (char-set-complement (char-set #\,))) | |
626 | ||
627 | ;; Make sure $HOME really corresponds to the current user. This is | |
628 | ;; necessary since lsh uses that to determine the location of the yarrow | |
629 | ;; seed file, and fails if it's owned by someone else. | |
630 | (and=> (passwd:dir (getpw (getuid))) | |
631 | (cut setenv "HOME" <>)) | |
632 | ||
0b724753 LC |
633 | ;; We rely on protocol-level compression from libssh to optimize large data |
634 | ;; transfers. Warn if it's missing. | |
635 | (unless (zlib-support?) | |
69daee23 LC |
636 | (warning (G_ "Guile-SSH lacks zlib support")) |
637 | (warning (G_ "data transfers will *not* be compressed!"))) | |
0b724753 | 638 | |
49e6291a LC |
639 | (match args |
640 | ((system max-silent-time print-build-trace? build-timeout) | |
641 | (let ((max-silent-time (string->number max-silent-time)) | |
642 | (build-timeout (string->number build-timeout)) | |
643 | (print-build-trace? (string=? print-build-trace? "1"))) | |
644 | (parameterize ((%current-system system)) | |
645 | (let loop ((line (read-line))) | |
646 | (unless (eof-object? line) | |
647 | (cond ((regexp-exec request-line-rx line) | |
648 | => | |
649 | (lambda (match) | |
e8a5db80 | 650 | (with-error-handling |
49e6291a LC |
651 | (process-request (equal? (match:substring match 1) "1") |
652 | (match:substring match 2) ; system | |
653 | (call-with-input-file | |
654 | (match:substring match 3) | |
655 | read-derivation) | |
656 | (string-tokenize | |
657 | (match:substring match 4) not-coma) | |
658 | #:print-build-trace? print-build-trace? | |
659 | #:max-silent-time max-silent-time | |
660 | #:build-timeout build-timeout)))) | |
661 | (else | |
69daee23 | 662 | (leave (G_ "invalid request line: ~s~%") line))) |
49e6291a | 663 | (loop (read-line))))))) |
aebaee95 LC |
664 | (("test" rest ...) |
665 | (with-error-handling | |
27991c97 LC |
666 | (let-values (((file pred) |
667 | (match rest | |
668 | ((file regexp) | |
669 | (values file | |
670 | (compose (cut string-match regexp <>) | |
671 | build-machine-name))) | |
672 | ((file) (values file (const #t))) | |
673 | (() (values %machine-file (const #t))) | |
69daee23 | 674 | (x (leave (G_ "wrong number of arguments~%")))))) |
27991c97 | 675 | (check-machine-availability (or file %machine-file) pred)))) |
49e6291a LC |
676 | (("--version") |
677 | (show-version-and-exit "guix offload")) | |
678 | (("--help") | |
69daee23 | 679 | (format #t (G_ "Usage: guix offload SYSTEM PRINT-BUILD-TRACE |
49e6291a LC |
680 | Process build offload requests written on the standard input, possibly |
681 | offloading builds to the machines listed in '~a'.~%") | |
682 | %machine-file) | |
69daee23 | 683 | (display (G_ " |
49e6291a LC |
684 | This tool is meant to be used internally by 'guix-daemon'.\n")) |
685 | (show-bug-report-information)) | |
686 | (x | |
69daee23 | 687 | (leave (G_ "invalid arguments: ~{~s ~}~%") x)))) |
49e6291a | 688 | |
f326fef8 | 689 | ;;; Local Variables: |
178f5828 | 690 | ;;; eval: (put 'with-machine-lock 'scheme-indent-function 2) |
4bf1eb4f | 691 | ;;; eval: (put 'with-file-lock 'scheme-indent-function 1) |
d81195bf | 692 | ;;; eval: (put 'with-error-to-port 'scheme-indent-function 1) |
f326fef8 LC |
693 | ;;; End: |
694 | ||
49e6291a | 695 | ;;; offload.scm ends here |