Commit | Line | Data |
---|---|---|
49e6291a | 1 | ;;; GNU Guix --- Functional package management for GNU |
0a40626f | 2 | ;;; Copyright © 2014, 2015, 2016 Ludovic Courtès <ludo@gnu.org> |
49e6291a LC |
3 | ;;; |
4 | ;;; This file is part of GNU Guix. | |
5 | ;;; | |
6 | ;;; GNU Guix is free software; you can redistribute it and/or modify it | |
7 | ;;; under the terms of the GNU General Public License as published by | |
8 | ;;; the Free Software Foundation; either version 3 of the License, or (at | |
9 | ;;; your option) any later version. | |
10 | ;;; | |
11 | ;;; GNU Guix is distributed in the hope that it will be useful, but | |
12 | ;;; WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | ;;; GNU General Public License for more details. | |
15 | ;;; | |
16 | ;;; You should have received a copy of the GNU General Public License | |
17 | ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. | |
18 | ||
19 | (define-module (guix scripts offload) | |
21531add LC |
20 | #:use-module (ssh key) |
21 | #:use-module (ssh auth) | |
22 | #:use-module (ssh session) | |
23 | #:use-module (ssh channel) | |
cf283dd9 LC |
24 | #:use-module (ssh popen) |
25 | #:use-module (ssh dist) | |
26 | #:use-module (ssh dist node) | |
0b724753 | 27 | #:use-module (ssh version) |
49e6291a LC |
28 | #:use-module (guix config) |
29 | #:use-module (guix records) | |
30 | #:use-module (guix store) | |
31 | #:use-module (guix derivations) | |
2535635f LC |
32 | #:use-module ((guix serialization) |
33 | #:select (nar-error? nar-error-file)) | |
49e6291a LC |
34 | #:use-module (guix nar) |
35 | #:use-module (guix utils) | |
0a40626f | 36 | #:use-module ((guix build syscalls) #:select (fcntl-flock)) |
f326fef8 | 37 | #:use-module ((guix build utils) #:select (which mkdir-p)) |
49e6291a LC |
38 | #:use-module (guix ui) |
39 | #:use-module (srfi srfi-1) | |
6c41cce0 | 40 | #:use-module (srfi srfi-11) |
49e6291a LC |
41 | #:use-module (srfi srfi-26) |
42 | #:use-module (srfi srfi-34) | |
43 | #:use-module (srfi srfi-35) | |
44 | #:use-module (ice-9 popen) | |
45 | #:use-module (ice-9 rdelim) | |
46 | #:use-module (ice-9 match) | |
47 | #:use-module (ice-9 regex) | |
48 | #:use-module (ice-9 format) | |
2535635f | 49 | #:use-module (ice-9 binary-ports) |
49e6291a LC |
50 | #:export (build-machine |
51 | build-requirements | |
52 | guix-offload)) | |
53 | ||
54 | ;;; Commentary: | |
55 | ;;; | |
56 | ;;; Attempt to offload builds to the machines listed in | |
57 | ;;; /etc/guix/machines.scm, transferring missing dependencies over SSH, and | |
58 | ;;; retrieving the build output(s) over SSH upon success. | |
59 | ;;; | |
60 | ;;; This command should not be used directly; instead, it is called on-demand | |
61 | ;;; by the daemon, unless it was started with '--no-build-hook' or a client | |
62 | ;;; inhibited build hooks. | |
63 | ;;; | |
64 | ;;; Code: | |
65 | ||
66 | ||
67 | (define-record-type* <build-machine> | |
68 | build-machine make-build-machine | |
69 | build-machine? | |
70 | (name build-machine-name) ; string | |
cecd72d5 LC |
71 | (port build-machine-port ; number |
72 | (default 22)) | |
49e6291a LC |
73 | (system build-machine-system) ; string |
74 | (user build-machine-user) ; string | |
75 | (private-key build-machine-private-key ; file name | |
21531add LC |
76 | (default (user-openssh-private-key))) |
77 | (host-key build-machine-host-key) ; string | |
1d48cf94 LC |
78 | (compression build-machine-compression ; string |
79 | (default "zlib@openssh.com,zlib")) | |
80 | (compression-level build-machine-compression-level ;integer | |
81 | (default 3)) | |
cf283dd9 LC |
82 | (daemon-socket build-machine-daemon-socket ; string |
83 | (default "/var/guix/daemon-socket/socket")) | |
49e6291a LC |
84 | (parallel-builds build-machine-parallel-builds ; number |
85 | (default 1)) | |
86 | (speed build-machine-speed ; inexact real | |
87 | (default 1.0)) | |
88 | (features build-machine-features ; list of strings | |
89 | (default '()))) | |
90 | ||
91 | (define-record-type* <build-requirements> | |
92 | build-requirements make-build-requirements | |
93 | build-requirements? | |
94 | (system build-requirements-system) ; string | |
95 | (features build-requirements-features ; list of strings | |
96 | (default '()))) | |
97 | ||
98 | (define %machine-file | |
99 | ;; File that lists machines available as build slaves. | |
100 | (string-append %config-directory "/machines.scm")) | |
101 | ||
21531add LC |
102 | (define (user-openssh-private-key) |
103 | "Return the user's default SSH private key, or #f if it could not be | |
49e6291a LC |
104 | determined." |
105 | (and=> (getenv "HOME") | |
21531add | 106 | (cut string-append <> "/.ssh/id_rsa"))) |
49e6291a LC |
107 | |
108 | (define %user-module | |
109 | ;; Module in which the machine description file is loaded. | |
110 | (let ((module (make-fresh-user-module))) | |
111 | (module-use! module (resolve-interface '(guix scripts offload))) | |
112 | module)) | |
113 | ||
114 | (define* (build-machines #:optional (file %machine-file)) | |
115 | "Read the list of build machines from FILE and return it." | |
116 | (catch #t | |
117 | (lambda () | |
118 | ;; Avoid ABI incompatibility with the <build-machine> record. | |
119 | (set! %fresh-auto-compile #t) | |
120 | ||
121 | (save-module-excursion | |
122 | (lambda () | |
123 | (set-current-module %user-module) | |
92cb2e28 | 124 | (primitive-load file)))) |
49e6291a LC |
125 | (lambda args |
126 | (match args | |
e465d9e1 | 127 | (('system-error . rest) |
49e6291a LC |
128 | (let ((err (system-error-errno args))) |
129 | ;; Silently ignore missing file since this is a common case. | |
130 | (if (= ENOENT err) | |
131 | '() | |
132 | (leave (_ "failed to open machine file '~a': ~a~%") | |
92cb2e28 | 133 | file (strerror err))))) |
c1202fb1 LC |
134 | (('syntax-error proc message properties form . rest) |
135 | (let ((loc (source-properties->location properties))) | |
136 | (leave (_ "~a: ~a~%") | |
137 | (location->string loc) message))) | |
e465d9e1 | 138 | (x |
49e6291a | 139 | (leave (_ "failed to load machine file '~a': ~s~%") |
92cb2e28 | 140 | file args)))))) |
49e6291a | 141 | |
21531add LC |
142 | (define (host-key->type+key host-key) |
143 | "Destructure HOST-KEY, an OpenSSH host key string, and return two values: | |
144 | its key type as a symbol, and the actual base64-encoded string." | |
145 | (define (type->symbol type) | |
146 | (and (string-prefix? "ssh-" type) | |
147 | (string->symbol (string-drop type 4)))) | |
148 | ||
149 | (match (string-tokenize host-key) | |
150 | ((type key _) | |
151 | (values (type->symbol type) key)) | |
152 | ((type key) | |
153 | (values (type->symbol type) key)))) | |
154 | ||
155 | (define (private-key-from-file* file) | |
156 | "Like 'private-key-from-file', but raise an error that 'with-error-handling' | |
157 | can interpret meaningfully." | |
158 | (catch 'guile-ssh-error | |
159 | (lambda () | |
160 | (private-key-from-file file)) | |
161 | (lambda (key proc str . rest) | |
162 | (raise (condition | |
163 | (&message (message (format #f (_ "failed to load SSH \ | |
164 | private key from '~a': ~a") | |
165 | file str)))))))) | |
166 | ||
167 | (define (open-ssh-session machine) | |
168 | "Open an SSH session for MACHINE and return it. Throw an error on failure." | |
169 | (let ((private (private-key-from-file* (build-machine-private-key machine))) | |
170 | (public (public-key-from-file | |
171 | (string-append (build-machine-private-key machine) | |
172 | ".pub"))) | |
173 | (session (make-session #:user (build-machine-user machine) | |
174 | #:host (build-machine-name machine) | |
175 | #:port (build-machine-port machine) | |
8d125cfc | 176 | #:timeout 10 ;seconds |
21531add LC |
177 | ;; #:log-verbosity 'protocol |
178 | #:identity (build-machine-private-key machine) | |
179 | ||
180 | ;; We need lightweight compression when | |
181 | ;; exchanging full archives. | |
1d48cf94 LC |
182 | #:compression |
183 | (build-machine-compression machine) | |
184 | #:compression-level | |
185 | (build-machine-compression-level machine)))) | |
74afca5d LC |
186 | (match (connect! session) |
187 | ('ok | |
188 | ;; Authenticate the server. XXX: Guile-SSH 0.10.1 doesn't know about | |
189 | ;; ed25519 keys and 'get-key-type' returns #f in that case. | |
190 | (let-values (((server) (get-server-public-key session)) | |
191 | ((type key) (host-key->type+key | |
192 | (build-machine-host-key machine)))) | |
193 | (unless (and (or (not (get-key-type server)) | |
194 | (eq? (get-key-type server) type)) | |
195 | (string=? (public-key->string server) key)) | |
196 | ;; Key mismatch: something's wrong. XXX: It could be that the server | |
197 | ;; provided its Ed25519 key when we where expecting its RSA key. | |
198 | (leave (_ "server at '~a' returned host key '~a' of type '~a' \ | |
21531add | 199 | instead of '~a' of type '~a'~%") |
74afca5d LC |
200 | (build-machine-name machine) |
201 | (public-key->string server) (get-key-type server) | |
202 | key type))) | |
203 | ||
204 | (let ((auth (userauth-public-key! session private))) | |
205 | (unless (eq? 'success auth) | |
206 | (disconnect! session) | |
207 | (leave (_ "SSH public key authentication failed for '~a': ~a~%") | |
208 | (build-machine-name machine) (get-error session)))) | |
209 | ||
210 | session) | |
211 | (x | |
212 | ;; Connection failed or timeout expired. | |
213 | (leave (_ "failed to connect to '~a': ~a~%") | |
214 | (build-machine-name machine) (get-error session)))))) | |
21531add | 215 | |
cf283dd9 LC |
216 | (define* (connect-to-remote-daemon session |
217 | #:optional | |
218 | (socket-name "/var/guix/daemon-socket/socket")) | |
219 | "Connect to the remote build daemon listening on SOCKET-NAME over SESSION, | |
220 | an SSH session. Return a <nix-server> object." | |
221 | (define redirect | |
222 | ;; Code run in SESSION to redirect the remote process' stdin/stdout to the | |
223 | ;; daemon's socket, à la socat. The SSH protocol supports forwarding to | |
224 | ;; Unix-domain sockets but libssh doesn't have an API for that, hence this | |
225 | ;; hack. | |
226 | `(begin | |
227 | (use-modules (ice-9 match) (rnrs io ports)) | |
228 | ||
229 | (let ((sock (socket AF_UNIX SOCK_STREAM 0)) | |
230 | (stdin (current-input-port)) | |
231 | (stdout (current-output-port))) | |
232 | (setvbuf stdin _IONBF) | |
233 | (setvbuf stdout _IONBF) | |
234 | (connect sock AF_UNIX ,socket-name) | |
235 | ||
236 | (let loop () | |
237 | (match (select (list stdin sock) '() (list stdin stdout sock)) | |
238 | ((reads writes ()) | |
239 | (when (memq stdin reads) | |
240 | (match (get-bytevector-some stdin) | |
241 | ((? eof-object?) | |
242 | (primitive-exit 0)) | |
243 | (bv | |
244 | (put-bytevector sock bv)))) | |
245 | (when (memq sock reads) | |
246 | (match (get-bytevector-some sock) | |
247 | ((? eof-object?) | |
248 | (primitive-exit 0)) | |
249 | (bv | |
250 | (put-bytevector stdout bv)))) | |
251 | (loop)) | |
252 | (_ | |
253 | (primitive-exit 1))))))) | |
254 | ||
255 | (let ((channel | |
256 | (open-remote-pipe* session OPEN_BOTH | |
257 | ;; Sort-of shell-quote REDIRECT. | |
258 | "guile" "-c" | |
259 | (object->string | |
260 | (object->string redirect))))) | |
261 | (open-connection #:port channel))) | |
262 | ||
59f704df LC |
263 | \f |
264 | ;;; | |
265 | ;;; Synchronization. | |
266 | ;;; | |
267 | ||
268 | (define (lock-file file) | |
269 | "Wait and acquire an exclusive lock on FILE. Return an open port." | |
270 | (mkdir-p (dirname file)) | |
271 | (let ((port (open-file file "w0"))) | |
272 | (fcntl-flock port 'write-lock) | |
273 | port)) | |
274 | ||
275 | (define (unlock-file lock) | |
276 | "Unlock LOCK." | |
277 | (fcntl-flock lock 'unlock) | |
278 | (close-port lock) | |
279 | #t) | |
280 | ||
281 | (define-syntax-rule (with-file-lock file exp ...) | |
282 | "Wait to acquire a lock on FILE and evaluate EXP in that context." | |
283 | (let ((port (lock-file file))) | |
284 | (dynamic-wind | |
285 | (lambda () | |
286 | #t) | |
287 | (lambda () | |
288 | exp ...) | |
289 | (lambda () | |
290 | (unlock-file port))))) | |
291 | ||
292 | (define-syntax-rule (with-machine-lock machine hint exp ...) | |
293 | "Wait to acquire MACHINE's exclusive lock for HINT, and evaluate EXP in that | |
294 | context." | |
295 | (with-file-lock (machine-lock-file machine hint) | |
296 | exp ...)) | |
297 | ||
298 | ||
299 | (define (machine-slot-file machine slot) | |
300 | "Return the file name of MACHINE's file for SLOT." | |
301 | ;; For each machine we have a bunch of files representing each build slot. | |
302 | ;; When choosing a build machine, we attempt to get an exclusive lock on one | |
303 | ;; of these; if we fail, that means all the build slots are already taken. | |
304 | ;; Inspired by Nix's build-remote.pl. | |
305 | (string-append (string-append %state-directory "/offload/" | |
306 | (build-machine-name machine) | |
307 | "/" (number->string slot)))) | |
308 | ||
309 | (define (acquire-build-slot machine) | |
310 | "Attempt to acquire a build slot on MACHINE. Return the port representing | |
311 | the slot, or #f if none is available. | |
312 | ||
313 | This mechanism allows us to set a hard limit on the number of simultaneous | |
314 | connections allowed to MACHINE." | |
315 | (mkdir-p (dirname (machine-slot-file machine 0))) | |
316 | (with-machine-lock machine 'slots | |
317 | (any (lambda (slot) | |
318 | (let ((port (open-file (machine-slot-file machine slot) | |
319 | "w0"))) | |
320 | (catch 'flock-error | |
321 | (lambda () | |
322 | (fcntl-flock port 'write-lock #:wait? #f) | |
323 | ;; Got it! | |
324 | (format (current-error-port) | |
325 | "process ~a acquired build slot '~a'~%" | |
326 | (getpid) (port-filename port)) | |
327 | port) | |
328 | (lambda args | |
329 | ;; PORT is already locked by another process. | |
330 | (close-port port) | |
331 | #f)))) | |
332 | (iota (build-machine-parallel-builds machine))))) | |
333 | ||
334 | (define (release-build-slot slot) | |
335 | "Release SLOT, a build slot as returned as by 'acquire-build-slot'." | |
336 | (close-port slot)) | |
337 | ||
338 | \f | |
339 | ;;; | |
340 | ;;; Offloading. | |
341 | ;;; | |
342 | ||
d81195bf LC |
343 | (define (build-log-port) |
344 | "Return the default port where build logs should be sent. The default is | |
345 | file descriptor 4, which is open by the daemon before running the offload | |
346 | hook." | |
347 | (let ((port (fdopen 4 "w0"))) | |
348 | ;; Make sure file descriptor 4 isn't closed when PORT is GC'd. | |
349 | (set-port-revealed! port 1) | |
350 | port)) | |
351 | ||
88da0b68 LC |
352 | (define* (transfer-and-offload drv machine |
353 | #:key | |
354 | (inputs '()) | |
355 | (outputs '()) | |
356 | (max-silent-time 3600) | |
714084e6 | 357 | build-timeout |
88da0b68 LC |
358 | print-build-trace?) |
359 | "Offload DRV to MACHINE. Prior to the actual offloading, transfer all of | |
360 | INPUTS to MACHINE; if building DRV succeeds, retrieve all of OUTPUTS from | |
361 | MACHINE." | |
9e76eed3 LC |
362 | (define session |
363 | (open-ssh-session machine)) | |
364 | ||
cf283dd9 LC |
365 | (define store |
366 | (connect-to-remote-daemon session | |
367 | (build-machine-daemon-socket machine))) | |
368 | ||
369 | (set-build-options store | |
370 | #:print-build-trace print-build-trace? | |
371 | #:max-silent-time max-silent-time | |
372 | #:timeout build-timeout) | |
373 | ||
374 | ;; Protect DRV from garbage collection. | |
375 | (add-temp-root store (derivation-file-name drv)) | |
376 | ||
377 | (send-files (cons (derivation-file-name drv) inputs) | |
378 | store) | |
379 | (format (current-error-port) "offloading '~a' to '~a'...~%" | |
380 | (derivation-file-name drv) (build-machine-name machine)) | |
381 | (format (current-error-port) "@ build-remote ~a ~a~%" | |
382 | (derivation-file-name drv) (build-machine-name machine)) | |
383 | ||
384 | (guard (c ((nix-protocol-error? c) | |
385 | (format (current-error-port) | |
386 | (_ "derivation '~a' offloaded to '~a' failed: ~a~%") | |
387 | (derivation-file-name drv) | |
388 | (build-machine-name machine) | |
389 | (nix-protocol-error-message c)) | |
390 | ;; Use exit code 100 for a permanent build failure. The daemon | |
391 | ;; interprets other non-zero codes as transient build failures. | |
392 | (primitive-exit 100))) | |
0237d797 LC |
393 | (parameterize ((current-build-output-port (build-log-port))) |
394 | (build-derivations store (list drv)))) | |
cf283dd9 LC |
395 | |
396 | (retrieve-files outputs store) | |
397 | (format (current-error-port) "done with offloaded '~a'~%" | |
398 | (derivation-file-name drv))) | |
399 | ||
400 | (define (store-import-channel session) | |
401 | "Return an output port to which archives to be exported to SESSION's store | |
402 | can be written." | |
403 | ;; Using the 'import-paths' RPC on a remote store would be slow because it | |
404 | ;; makes a round trip every time 32 KiB have been transferred. This | |
405 | ;; procedure instead opens a separate channel to use the remote | |
406 | ;; 'import-paths' procedure, which consumes all the data in a single round | |
407 | ;; trip. | |
408 | (define import | |
409 | `(begin | |
410 | (use-modules (guix)) | |
411 | ||
412 | (with-store store | |
413 | (setvbuf (current-input-port) _IONBF) | |
414 | (import-paths store (current-input-port))))) | |
415 | ||
416 | (open-remote-output-pipe session | |
417 | (string-join | |
418 | `("guile" "-c" | |
419 | ,(object->string | |
420 | (object->string import)))))) | |
421 | ||
422 | (define (store-export-channel session files) | |
423 | "Return an input port from which an export of FILES from SESSION's store can | |
424 | be read." | |
425 | ;; Same as above: this is more efficient than calling 'export-paths' on a | |
426 | ;; remote store. | |
427 | (define export | |
428 | `(begin | |
429 | (use-modules (guix)) | |
430 | ||
431 | (with-store store | |
432 | (setvbuf (current-output-port) _IONBF) | |
433 | (export-paths store ',files (current-output-port))))) | |
434 | ||
435 | (open-remote-input-pipe session | |
436 | (string-join | |
437 | `("guile" "-c" | |
438 | ,(object->string | |
439 | (object->string export)))))) | |
49e6291a | 440 | |
cf283dd9 LC |
441 | (define (send-files files remote) |
442 | "Send the subset of FILES that's missing to REMOTE, a remote store." | |
49e6291a | 443 | (with-store store |
f3cf8606 LC |
444 | ;; Compute the subset of FILES missing on SESSION and send them. |
445 | (let* ((session (channel-get-session (nix-server-socket remote))) | |
cf283dd9 LC |
446 | (node (make-node session)) |
447 | (missing (node-eval node | |
448 | `(begin | |
449 | (use-modules (guix) | |
450 | (srfi srfi-1) (srfi srfi-26)) | |
451 | ||
452 | (with-store store | |
453 | (remove (cut valid-path? store <>) | |
f3cf8606 | 454 | ',files))))) |
e11c42f2 | 455 | (count (length missing)) |
cf283dd9 | 456 | (port (store-import-channel session))) |
e11c42f2 LC |
457 | (format #t (N_ "sending ~a store item to '~a'...~%" |
458 | "sending ~a store items to '~a'...~%" count) | |
459 | count (session-get session 'host)) | |
cf283dd9 | 460 | |
f3cf8606 | 461 | ;; Send MISSING in topological order. |
cf283dd9 LC |
462 | (export-paths store missing port) |
463 | ||
464 | ;; Tell the remote process that we're done. (In theory the | |
465 | ;; end-of-archive mark of 'export-paths' would be enough, but in | |
466 | ;; practice it's not.) | |
467 | (channel-send-eof port) | |
468 | ||
469 | ;; Wait for completion of the remote process. | |
470 | (let ((result (zero? (channel-get-exit-status port)))) | |
471 | (close-port port) | |
472 | result)))) | |
473 | ||
474 | (define (retrieve-files files remote) | |
9e76eed3 | 475 | "Retrieve FILES from SESSION's store, and import them." |
cf283dd9 LC |
476 | (let* ((session (channel-get-session (nix-server-socket remote))) |
477 | (host (session-get session 'host)) | |
e11c42f2 LC |
478 | (port (store-export-channel session files)) |
479 | (count (length files))) | |
480 | (format #t (N_ "retrieving ~a store item from '~a'...~%" | |
481 | "retrieving ~a store items from '~a'...~%" count) | |
482 | count host) | |
cf283dd9 LC |
483 | |
484 | ;; We cannot use the 'import-paths' RPC here because we already | |
485 | ;; hold the locks for FILES. | |
486 | (let ((result (restore-file-set port | |
487 | #:log-port (current-error-port) | |
488 | #:lock? #f))) | |
489 | (close-port port) | |
490 | result))) | |
49e6291a | 491 | |
59f704df LC |
492 | \f |
493 | ;;; | |
494 | ;;; Scheduling. | |
495 | ;;; | |
496 | ||
49e6291a LC |
497 | (define (machine-matches? machine requirements) |
498 | "Return #t if MACHINE matches REQUIREMENTS." | |
499 | (and (string=? (build-requirements-system requirements) | |
500 | (build-machine-system machine)) | |
501 | (lset<= string=? | |
502 | (build-requirements-features requirements) | |
503 | (build-machine-features machine)))) | |
504 | ||
165f4b2a LC |
505 | (define (machine-load machine) |
506 | "Return the load of MACHINE, divided by the number of parallel builds | |
463fb7d0 | 507 | allowed on MACHINE. Return +∞ if MACHINE is unreachable." |
1cd1d8a7 | 508 | ;; Note: This procedure is costly since it creates a new SSH session. |
463fb7d0 LC |
509 | (match (false-if-exception (open-ssh-session machine)) |
510 | ((? session? session) | |
511 | (let* ((pipe (open-remote-pipe* session OPEN_READ | |
bc1ad4e3 | 512 | "cat" "/proc/loadavg")) |
463fb7d0 LC |
513 | (line (read-line pipe))) |
514 | (close-port pipe) | |
515 | ||
516 | (if (eof-object? line) | |
517 | +inf.0 ;MACHINE does not respond, so assume it is infinitely loaded | |
518 | (match (string-tokenize line) | |
519 | ((one five fifteen . _) | |
520 | (let* ((raw (string->number five)) | |
521 | (jobs (build-machine-parallel-builds machine)) | |
522 | (normalized (/ raw jobs))) | |
523 | (format (current-error-port) "load on machine '~a' is ~s\ | |
165f4b2a | 524 | (normalized: ~s)~%" |
463fb7d0 LC |
525 | (build-machine-name machine) raw normalized) |
526 | normalized)) | |
527 | (_ | |
528 | +inf.0))))) ;something's fishy about MACHINE, so avoid it | |
529 | (_ | |
530 | +inf.0))) ;failed to connect to MACHINE, so avoid it | |
165f4b2a | 531 | |
178f5828 LC |
532 | (define (machine-lock-file machine hint) |
533 | "Return the name of MACHINE's lock file for HINT." | |
f326fef8 | 534 | (string-append %state-directory "/offload/" |
178f5828 LC |
535 | (build-machine-name machine) |
536 | "." (symbol->string hint) ".lock")) | |
f326fef8 | 537 | |
d652b851 LC |
538 | (define (machine-choice-lock-file) |
539 | "Return the name of the file used as a lock when choosing a build machine." | |
540 | (string-append %state-directory "/offload/machine-choice.lock")) | |
541 | ||
d652b851 | 542 | |
5cc569dc LC |
543 | (define %slots |
544 | ;; List of acquired build slots (open ports). | |
545 | '()) | |
546 | ||
88da0b68 LC |
547 | (define (choose-build-machine machines) |
548 | "Return the best machine among MACHINES, or #f." | |
d652b851 LC |
549 | |
550 | ;; Proceed like this: | |
551 | ;; 1. Acquire the global machine-choice lock. | |
552 | ;; 2. For all MACHINES, attempt to acquire a build slot, and filter out | |
553 | ;; those machines for which we failed. | |
554 | ;; 3. Choose the best machine among those that are left. | |
555 | ;; 4. Release the previously-acquired build slots of the other machines. | |
556 | ;; 5. Release the global machine-choice lock. | |
557 | ||
558 | (with-file-lock (machine-choice-lock-file) | |
1cd1d8a7 | 559 | (define machines+slots+loads |
7df3ade1 | 560 | (filter-map (lambda (machine) |
1cd1d8a7 LC |
561 | ;; Call 'machine-load' from here to make sure it is called |
562 | ;; only once per machine (it is expensive). | |
7df3ade1 | 563 | (let ((slot (acquire-build-slot machine))) |
1cd1d8a7 LC |
564 | (and slot |
565 | (list machine slot (machine-load machine))))) | |
7df3ade1 | 566 | machines)) |
d652b851 LC |
567 | |
568 | (define (undecorate pred) | |
1a8ea0a1 LC |
569 | (lambda (a b) |
570 | (match a | |
1cd1d8a7 | 571 | ((machine1 slot1 load1) |
1a8ea0a1 | 572 | (match b |
1cd1d8a7 LC |
573 | ((machine2 slot2 load2) |
574 | (pred machine1 load1 machine2 load2))))))) | |
575 | ||
576 | (define (machine-less-loaded-or-faster? m1 l1 m2 l2) | |
577 | ;; Return #t if M1 is either less loaded or faster than M2, with L1 | |
578 | ;; being the load of M1 and L2 the load of M2. (This relation defines a | |
579 | ;; total order on machines.) | |
580 | (> (/ (build-machine-speed m1) (+ 1 l1)) | |
581 | (/ (build-machine-speed m2) (+ 1 l2)))) | |
582 | ||
583 | (let loop ((machines+slots+loads | |
584 | (sort machines+slots+loads | |
b1fea303 | 585 | (undecorate machine-less-loaded-or-faster?)))) |
1cd1d8a7 LC |
586 | (match machines+slots+loads |
587 | (((best slot load) others ...) | |
d652b851 | 588 | ;; Return the best machine unless it's already overloaded. |
1cd1d8a7 | 589 | (if (< load 2.) |
b1fea303 | 590 | (match others |
1cd1d8a7 | 591 | (((machines slots loads) ...) |
b1fea303 LC |
592 | ;; Release slots from the uninteresting machines. |
593 | (for-each release-build-slot slots) | |
594 | ||
595 | ;; Prevent SLOT from being GC'd. | |
596 | (set! %slots (cons slot %slots)) | |
597 | best)) | |
5cc569dc | 598 | (begin |
b1fea303 | 599 | ;; BEST is overloaded, so try the next one. |
d652b851 | 600 | (release-build-slot slot) |
b1fea303 | 601 | (loop others)))) |
d652b851 | 602 | (() #f))))) |
49e6291a LC |
603 | |
604 | (define* (process-request wants-local? system drv features | |
605 | #:key | |
606 | print-build-trace? (max-silent-time 3600) | |
714084e6 | 607 | build-timeout) |
49e6291a | 608 | "Process a request to build DRV." |
88da0b68 LC |
609 | (let* ((local? (and wants-local? (string=? system (%current-system)))) |
610 | (reqs (build-requirements | |
611 | (system system) | |
612 | (features features))) | |
613 | (candidates (filter (cut machine-matches? <> reqs) | |
614 | (build-machines)))) | |
615 | (match candidates | |
616 | (() | |
617 | ;; We'll never be able to match REQS. | |
618 | (display "# decline\n")) | |
619 | ((_ ...) | |
620 | (let ((machine (choose-build-machine candidates))) | |
621 | (if machine | |
622 | (begin | |
623 | ;; Offload DRV to MACHINE. | |
624 | (display "# accept\n") | |
625 | (let ((inputs (string-tokenize (read-line))) | |
626 | (outputs (string-tokenize (read-line)))) | |
627 | (transfer-and-offload drv machine | |
628 | #:inputs inputs | |
629 | #:outputs outputs | |
630 | #:max-silent-time max-silent-time | |
631 | #:build-timeout build-timeout | |
632 | #:print-build-trace? print-build-trace?))) | |
633 | ||
634 | ;; Not now, all the machines are busy. | |
635 | (display "# postpone\n"))))))) | |
49e6291a | 636 | |
49e6291a | 637 | \f |
aebaee95 LC |
638 | ;;; |
639 | ;;; Installation tests. | |
640 | ;;; | |
641 | ||
642 | (define (assert-node-repl node name) | |
643 | "Bail out if NODE is not running Guile." | |
644 | (match (node-guile-version node) | |
645 | (#f | |
646 | (leave (_ "Guile could not be started on '~a'~%") | |
647 | name)) | |
648 | ((? string? version) | |
649 | ;; Note: The version string already contains the word "Guile". | |
650 | (info (_ "'~a' is running ~a~%") | |
651 | name (node-guile-version node))))) | |
652 | ||
653 | (define (assert-node-has-guix node name) | |
654 | "Bail out if NODE lacks the (guix) module, or if its daemon is not running." | |
655 | (match (node-eval node | |
656 | '(begin | |
657 | (use-modules (guix)) | |
658 | (with-store store | |
659 | (add-text-to-store store "test" | |
660 | "Hello, build machine!")))) | |
661 | ((? string? str) | |
662 | (info (_ "Guix is usable on '~a' (test returned ~s)~%") | |
663 | name str)) | |
664 | (x | |
665 | (leave (_ "failed to use Guix module on '~a' (test returned ~s)~%") | |
666 | name x)))) | |
667 | ||
668 | (define %random-state | |
669 | (delay | |
670 | (seed->random-state (logxor (getpid) (car (gettimeofday)))))) | |
671 | ||
672 | (define (nonce) | |
673 | (string-append (gethostname) "-" | |
674 | (number->string (random 1000000 (force %random-state))))) | |
675 | ||
676 | (define (assert-node-can-import node name daemon-socket) | |
677 | "Bail out if NODE refuses to import our archives." | |
678 | (let ((session (node-session node))) | |
679 | (with-store store | |
680 | (let* ((item (add-text-to-store store "export-test" (nonce))) | |
681 | (remote (connect-to-remote-daemon session daemon-socket))) | |
682 | (send-files (list item) remote) | |
683 | (if (valid-path? remote item) | |
684 | (info (_ "'~a' successfully imported '~a'~%") | |
685 | name item) | |
686 | (leave (_ "'~a' was not properly imported on '~a'~%") | |
687 | item name)))))) | |
688 | ||
689 | (define (assert-node-can-export node name daemon-socket) | |
690 | "Bail out if we cannot import signed archives from NODE." | |
691 | (let* ((session (node-session node)) | |
692 | (remote (connect-to-remote-daemon session daemon-socket)) | |
693 | (item (add-text-to-store remote "import-test" (nonce))) | |
694 | (port (store-export-channel session (list item)))) | |
695 | (with-store store | |
696 | (if (and (import-paths store port) | |
697 | (valid-path? store item)) | |
698 | (info (_ "successfully imported '~a' from '~a'~%") | |
699 | item name) | |
700 | (leave (_ "failed to import '~a' from '~a'~%") | |
701 | item name))))) | |
702 | ||
703 | (define (check-machine-availability machine-file) | |
704 | "Check that each machine in MACHINE-FILE is usable as a build machine." | |
705 | (let ((machines (build-machines machine-file))) | |
706 | (info (_ "testing ~a build machines defined in '~a'...~%") | |
707 | (length machines) machine-file) | |
708 | (let* ((names (map build-machine-name machines)) | |
709 | (sockets (map build-machine-daemon-socket machines)) | |
710 | (sessions (map open-ssh-session machines)) | |
711 | (nodes (map make-node sessions))) | |
712 | (for-each assert-node-repl nodes names) | |
713 | (for-each assert-node-has-guix nodes names) | |
714 | (for-each assert-node-can-import nodes names sockets) | |
715 | (for-each assert-node-can-export nodes names sockets)))) | |
716 | ||
717 | \f | |
49e6291a LC |
718 | ;;; |
719 | ;;; Entry point. | |
720 | ;;; | |
721 | ||
722 | (define (guix-offload . args) | |
723 | (define request-line-rx | |
724 | ;; The request format. See 'tryBuildHook' method in build.cc. | |
725 | (make-regexp "([01]) ([a-z0-9_-]+) (/[[:graph:]]+.drv) ([[:graph:]]*)")) | |
726 | ||
727 | (define not-coma | |
728 | (char-set-complement (char-set #\,))) | |
729 | ||
730 | ;; Make sure $HOME really corresponds to the current user. This is | |
731 | ;; necessary since lsh uses that to determine the location of the yarrow | |
732 | ;; seed file, and fails if it's owned by someone else. | |
733 | (and=> (passwd:dir (getpw (getuid))) | |
734 | (cut setenv "HOME" <>)) | |
735 | ||
0b724753 LC |
736 | ;; We rely on protocol-level compression from libssh to optimize large data |
737 | ;; transfers. Warn if it's missing. | |
738 | (unless (zlib-support?) | |
739 | (warning (_ "Guile-SSH lacks zlib support")) | |
740 | (warning (_ "data transfers will *not* be compressed!"))) | |
741 | ||
49e6291a LC |
742 | (match args |
743 | ((system max-silent-time print-build-trace? build-timeout) | |
744 | (let ((max-silent-time (string->number max-silent-time)) | |
745 | (build-timeout (string->number build-timeout)) | |
746 | (print-build-trace? (string=? print-build-trace? "1"))) | |
747 | (parameterize ((%current-system system)) | |
748 | (let loop ((line (read-line))) | |
749 | (unless (eof-object? line) | |
750 | (cond ((regexp-exec request-line-rx line) | |
751 | => | |
752 | (lambda (match) | |
e8a5db80 | 753 | (with-error-handling |
49e6291a LC |
754 | (process-request (equal? (match:substring match 1) "1") |
755 | (match:substring match 2) ; system | |
756 | (call-with-input-file | |
757 | (match:substring match 3) | |
758 | read-derivation) | |
759 | (string-tokenize | |
760 | (match:substring match 4) not-coma) | |
761 | #:print-build-trace? print-build-trace? | |
762 | #:max-silent-time max-silent-time | |
763 | #:build-timeout build-timeout)))) | |
764 | (else | |
765 | (leave (_ "invalid request line: ~s~%") line))) | |
766 | (loop (read-line))))))) | |
aebaee95 LC |
767 | (("test" rest ...) |
768 | (with-error-handling | |
769 | (let ((file (match rest | |
770 | ((file) file) | |
771 | (() %machine-file) | |
772 | (_ (leave (_ "wrong number of arguments~%")))))) | |
773 | (check-machine-availability (or file %machine-file))))) | |
49e6291a LC |
774 | (("--version") |
775 | (show-version-and-exit "guix offload")) | |
776 | (("--help") | |
777 | (format #t (_ "Usage: guix offload SYSTEM PRINT-BUILD-TRACE | |
778 | Process build offload requests written on the standard input, possibly | |
779 | offloading builds to the machines listed in '~a'.~%") | |
780 | %machine-file) | |
781 | (display (_ " | |
782 | This tool is meant to be used internally by 'guix-daemon'.\n")) | |
783 | (show-bug-report-information)) | |
784 | (x | |
785 | (leave (_ "invalid arguments: ~{~s ~}~%") x)))) | |
786 | ||
f326fef8 | 787 | ;;; Local Variables: |
178f5828 | 788 | ;;; eval: (put 'with-machine-lock 'scheme-indent-function 2) |
4bf1eb4f | 789 | ;;; eval: (put 'with-file-lock 'scheme-indent-function 1) |
d81195bf | 790 | ;;; eval: (put 'with-error-to-port 'scheme-indent-function 1) |
f326fef8 LC |
791 | ;;; End: |
792 | ||
49e6291a | 793 | ;;; offload.scm ends here |