Merge branch 'master' into staging
[jackhill/guix/guix.git] / gnu / build / linux-boot.scm
1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
3 ;;; Copyright © 2017 Mathieu Othacehe <m.othacehe@gmail.com>
4 ;;; Copyright © 2019 Guillaume Le Vaillant <glv@posteo.net>
5 ;;;
6 ;;; This file is part of GNU Guix.
7 ;;;
8 ;;; GNU Guix is free software; you can redistribute it and/or modify it
9 ;;; under the terms of the GNU General Public License as published by
10 ;;; the Free Software Foundation; either version 3 of the License, or (at
11 ;;; your option) any later version.
12 ;;;
13 ;;; GNU Guix is distributed in the hope that it will be useful, but
14 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
15 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 ;;; GNU General Public License for more details.
17 ;;;
18 ;;; You should have received a copy of the GNU General Public License
19 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
20
21 (define-module (gnu build linux-boot)
22 #:use-module (rnrs io ports)
23 #:use-module (system repl error-handling)
24 #:autoload (system repl repl) (start-repl)
25 #:use-module (srfi srfi-1)
26 #:use-module (srfi srfi-9)
27 #:use-module (srfi srfi-26)
28 #:use-module (ice-9 match)
29 #:use-module (ice-9 rdelim)
30 #:use-module (ice-9 regex)
31 #:use-module (ice-9 ftw)
32 #:use-module (guix build utils)
33 #:use-module ((guix build syscalls)
34 #:hide (file-system-type))
35 #:use-module (gnu build linux-modules)
36 #:use-module (gnu build file-systems)
37 #:use-module (gnu system file-systems)
38 #:export (mount-essential-file-systems
39 linux-command-line
40 find-long-option
41 find-long-options
42 make-essential-device-nodes
43 make-static-device-nodes
44 configure-qemu-networking
45
46 device-number
47 boot-system))
48
49 ;;; Commentary:
50 ;;;
51 ;;; Utility procedures useful in a Linux initial RAM disk (initrd). Note that
52 ;;; many of these use procedures not yet available in vanilla Guile (`mount',
53 ;;; `load-linux-module', etc.); these are provided by a Guile patch used in
54 ;;; the GNU distribution.
55 ;;;
56 ;;; Code:
57
58 (define* (mount-essential-file-systems #:key (root "/"))
59 "Mount /dev, /proc, and /sys under ROOT."
60 (define (scope dir)
61 (string-append root
62 (if (string-suffix? "/" root)
63 ""
64 "/")
65 dir))
66
67 (unless (file-exists? (scope "proc"))
68 (mkdir (scope "proc")))
69 (mount "none" (scope "proc") "proc")
70
71 (unless (file-exists? (scope "dev"))
72 (mkdir (scope "dev")))
73 (mount "none" (scope "dev") "devtmpfs")
74
75 (unless (file-exists? (scope "sys"))
76 (mkdir (scope "sys")))
77 (mount "none" (scope "sys") "sysfs"))
78
79 (define (move-essential-file-systems root)
80 "Move currently mounted essential file systems to ROOT."
81 (for-each (lambda (dir)
82 (let ((target (string-append root dir)))
83 (unless (file-exists? target)
84 (mkdir target))
85 (mount dir target "" MS_MOVE)))
86 '("/dev" "/proc" "/sys")))
87
88 (define (linux-command-line)
89 "Return the Linux kernel command line as a list of strings."
90 (string-tokenize
91 (call-with-input-file "/proc/cmdline"
92 get-string-all)))
93
94 (define (find-long-option option arguments)
95 "Find OPTION among ARGUMENTS, where OPTION is something like \"--load\".
96 Return the value associated with OPTION, or #f on failure."
97 (let ((opt (string-append option "=")))
98 (and=> (find (cut string-prefix? opt <>)
99 arguments)
100 (lambda (arg)
101 (substring arg (+ 1 (string-index arg #\=)))))))
102
103 (define (find-long-options option arguments)
104 "Find OPTIONs among ARGUMENTS, where OPTION is something like \"console\".
105 Return the values associated with OPTIONs as a list, or the empty list if
106 OPTION doesn't appear in ARGUMENTS."
107 (let ((opt (string-append option "=")))
108 (filter-map (lambda (arg)
109 (and (string-prefix? opt arg)
110 (substring arg (+ 1 (string-index arg #\=)))))
111 arguments)))
112
113 (define* (make-disk-device-nodes base major #:optional (minor 0))
114 "Make the block device nodes around BASE (something like \"/root/dev/sda\")
115 with the given MAJOR number, starting with MINOR."
116 (mknod base 'block-special #o644 (device-number major minor))
117 (let loop ((i 1))
118 (when (< i 16)
119 (mknod (string-append base (number->string i))
120 'block-special #o644 (device-number major (+ minor i)))
121 (loop (+ i 1)))))
122
123 ;; Representation of a /dev node.
124 (define-record-type <device-node>
125 (device-node name type major minor module)
126 device-node?
127 (name device-node-name)
128 (type device-node-type)
129 (major device-node-major)
130 (minor device-node-minor)
131 (module device-node-module))
132
133 (define (read-static-device-nodes port)
134 "Read from PORT a list of <device-node> written in the format used by
135 /lib/modules/*/*.devname files."
136 (let loop ((line (read-line port)))
137 (if (eof-object? line)
138 '()
139 (match (string-split line #\space)
140 (((? (cut string-prefix? "#" <>)) _ ...)
141 (loop (read-line port)))
142 ((module-name device-name device-spec)
143 (let* ((device-parts
144 (string-match "([bc])([0-9][0-9]*):([0-9][0-9]*)"
145 device-spec))
146 (type-string (match:substring device-parts 1))
147 (type (match type-string
148 ("c" 'char-special)
149 ("b" 'block-special)))
150 (major-string (match:substring device-parts 2))
151 (major (string->number major-string 10))
152 (minor-string (match:substring device-parts 3))
153 (minor (string->number minor-string 10)))
154 (cons (device-node device-name type major minor module-name)
155 (loop (read-line port)))))
156 (_
157 (begin
158 (format (current-error-port)
159 "read-static-device-nodes: ignored devname line '~a'~%" line)
160 (loop (read-line port))))))))
161
162 (define* (mkdir-p* dir #:optional (mode #o755))
163 "This is a variant of 'mkdir-p' that works around
164 <http://bugs.gnu.org/24659> by passing MODE explicitly in each 'mkdir' call."
165 (define absolute?
166 (string-prefix? "/" dir))
167
168 (define not-slash
169 (char-set-complement (char-set #\/)))
170
171 (let loop ((components (string-tokenize dir not-slash))
172 (root (if absolute?
173 ""
174 ".")))
175 (match components
176 ((head tail ...)
177 (let ((path (string-append root "/" head)))
178 (catch 'system-error
179 (lambda ()
180 (mkdir path mode)
181 (loop tail path))
182 (lambda args
183 (if (= EEXIST (system-error-errno args))
184 (loop tail path)
185 (apply throw args))))))
186 (() #t))))
187
188 (define (report-system-error name . args)
189 "Report a system error for the file NAME."
190 (let ((errno (system-error-errno args)))
191 (format (current-error-port) "could not create '~a': ~a~%" name
192 (strerror errno))))
193
194 ;; Catch a system-error, log it and don't die from it.
195 (define-syntax-rule (catch-system-error name exp)
196 (catch 'system-error
197 (lambda ()
198 exp)
199 (lambda args
200 (apply report-system-error name args))))
201
202 ;; Create a device node like the <device-node> passed here on the file system.
203 (define create-device-node
204 (match-lambda
205 (($ <device-node> xname type major minor module)
206 (let ((name (string-append "/dev/" xname)))
207 (mkdir-p* (dirname name))
208 (catch-system-error name
209 (mknod name type #o600 (device-number major minor)))))))
210
211 (define* (make-static-device-nodes linux-release-module-directory)
212 "Create static device nodes required by the given Linux release.
213 This is required in order to solve a chicken-or-egg problem:
214 The Linux kernel has a feature to autoload modules when a device is first
215 accessed.
216 And udev has a feature to set the permissions of static nodes correctly
217 when it is starting up and also to automatically create nodes when hardware
218 is hotplugged. That leaves universal device files which are not linked to
219 one specific hardware device. These we have to create."
220 (let ((devname-name (string-append linux-release-module-directory "/"
221 "modules.devname")))
222 (for-each create-device-node
223 (call-with-input-file devname-name
224 read-static-device-nodes))))
225
226 (define* (make-essential-device-nodes #:optional (root "/"))
227 "Make essential device nodes under ROOT/dev."
228 ;; The hand-made devtmpfs/udev!
229
230 (define (scope dir)
231 (string-append root
232 (if (string-suffix? "/" root)
233 ""
234 "/")
235 dir))
236
237 (unless (file-exists? (scope "dev"))
238 (mkdir (scope "dev")))
239
240 ;; Make the device nodes for SCSI disks.
241 (make-disk-device-nodes (scope "dev/sda") 8)
242 (make-disk-device-nodes (scope "dev/sdb") 8 16)
243 (make-disk-device-nodes (scope "dev/sdc") 8 32)
244 (make-disk-device-nodes (scope "dev/sdd") 8 48)
245
246 ;; SCSI CD-ROM devices (aka. "/dev/sr0" etc.).
247 (mknod (scope "dev/scd0") 'block-special #o644 (device-number 11 0))
248 (mknod (scope "dev/scd1") 'block-special #o644 (device-number 11 1))
249
250 ;; The virtio (para-virtualized) block devices, as supported by QEMU/KVM.
251 (make-disk-device-nodes (scope "dev/vda") 252)
252
253 ;; Memory (used by Xorg's VESA driver.)
254 (mknod (scope "dev/mem") 'char-special #o640 (device-number 1 1))
255 (mknod (scope "dev/kmem") 'char-special #o640 (device-number 1 2))
256
257 ;; Inputs (used by Xorg.)
258 (unless (file-exists? (scope "dev/input"))
259 (mkdir (scope "dev/input")))
260 (mknod (scope "dev/input/mice") 'char-special #o640 (device-number 13 63))
261 (mknod (scope "dev/input/mouse0") 'char-special #o640 (device-number 13 32))
262 (mknod (scope "dev/input/event0") 'char-special #o640 (device-number 13 64))
263
264 ;; System console. This node is magically created by the kernel on the
265 ;; initrd's root, so don't try to create it in that case.
266 (unless (string=? root "/")
267 (mknod (scope "dev/console") 'char-special #o600
268 (device-number 5 1)))
269
270 ;; TTYs.
271 (mknod (scope "dev/tty") 'char-special #o600
272 (device-number 5 0))
273 (chmod (scope "dev/tty") #o666)
274 (let loop ((n 0))
275 (and (< n 50)
276 (let ((name (format #f "dev/tty~a" n)))
277 (mknod (scope name) 'char-special #o600
278 (device-number 4 n))
279 (loop (+ 1 n)))))
280
281 ;; Serial line.
282 (mknod (scope "dev/ttyS0") 'char-special #o660
283 (device-number 4 64))
284
285 ;; Pseudo ttys.
286 (mknod (scope "dev/ptmx") 'char-special #o666
287 (device-number 5 2))
288 (chmod (scope "dev/ptmx") #o666)
289
290 ;; Create /dev/pts; it will be mounted later, at boot time.
291 (unless (file-exists? (scope "dev/pts"))
292 (mkdir (scope "dev/pts")))
293
294 ;; Rendez-vous point for syslogd.
295 (mknod (scope "dev/log") 'socket #o666 0)
296 (mknod (scope "dev/kmsg") 'char-special #o600 (device-number 1 11))
297
298 ;; Other useful nodes, notably relied on by guix-daemon.
299 (for-each (match-lambda
300 ((file major minor)
301 (mknod (scope file) 'char-special #o666
302 (device-number major minor))
303 (chmod (scope file) #o666)))
304 '(("dev/null" 1 3)
305 ("dev/zero" 1 5)
306 ("dev/full" 1 7)
307 ("dev/random" 1 8)
308 ("dev/urandom" 1 9)))
309
310 (symlink "/proc/self/fd" (scope "dev/fd"))
311 (symlink "/proc/self/fd/0" (scope "dev/stdin"))
312 (symlink "/proc/self/fd/1" (scope "dev/stdout"))
313 (symlink "/proc/self/fd/2" (scope "dev/stderr"))
314
315 ;; Loopback devices.
316 (let loop ((i 0))
317 (when (< i 8)
318 (mknod (scope (string-append "dev/loop" (number->string i)))
319 'block-special #o660
320 (device-number 7 i))
321 (loop (+ 1 i))))
322
323 ;; File systems in user space (FUSE).
324 (mknod (scope "dev/fuse") 'char-special #o666 (device-number 10 229)))
325
326 (define %host-qemu-ipv4-address
327 (inet-pton AF_INET "10.0.2.10"))
328
329 (define* (configure-qemu-networking #:optional (interface "eth0"))
330 "Setup the INTERFACE network interface and /etc/resolv.conf according to
331 QEMU's default networking settings (see net/slirp.c in QEMU for default
332 networking values.) Return #t if INTERFACE is up, #f otherwise."
333 (display "configuring QEMU networking...\n")
334 (let* ((sock (socket AF_INET SOCK_STREAM 0))
335 (address (make-socket-address AF_INET %host-qemu-ipv4-address 0))
336 (flags (network-interface-flags sock interface)))
337 (set-network-interface-address sock interface address)
338 (set-network-interface-flags sock interface (logior flags IFF_UP))
339
340 ;; Hello! We used to create /etc/resolv.conf here, with "nameserver
341 ;; 10.0.2.3\n". However, with Linux-libre 3.16, we're getting ENOSPC.
342 ;; And since it's actually unnecessary, it's gone.
343
344 (logand (network-interface-flags sock interface) IFF_UP)))
345
346 (define (device-number major minor)
347 "Return the device number for the device with MAJOR and MINOR, for use as
348 the last argument of `mknod'."
349 (+ (* major 256) minor))
350
351 (define (pidof program)
352 "Return the PID of the first presumed instance of PROGRAM."
353 (let ((program (basename program)))
354 (find (lambda (pid)
355 (let ((exe (format #f "/proc/~a/exe" pid)))
356 (and=> (false-if-exception (readlink exe))
357 (compose (cut string=? program <>) basename))))
358 (filter-map string->number (scandir "/proc")))))
359
360 (define* (mount-root-file-system root type
361 #:key volatile-root? (flags 0) options)
362 "Mount the root file system of type TYPE at device ROOT. If VOLATILE-ROOT? is
363 true, mount ROOT read-only and make it an overlay with a writable tmpfs using
364 the kernel built-in overlayfs. FLAGS and OPTIONS indicates the options to use
365 to mount ROOT, and behave the same as for the `mount' procedure."
366
367 (if volatile-root?
368 (begin
369 (mkdir-p "/real-root")
370 (mount root "/real-root" type (logior MS_RDONLY flags) options)
371 (mkdir-p "/rw-root")
372 (mount "none" "/rw-root" "tmpfs")
373
374 ;; Create the upperdir and the workdir of the overlayfs
375 (mkdir-p "/rw-root/upper")
376 (mkdir-p "/rw-root/work")
377
378 ;; We want read-write /dev nodes.
379 (mkdir-p "/rw-root/upper/dev")
380 (mount "none" "/rw-root/upper/dev" "devtmpfs")
381
382 ;; Make /root an overlay of the tmpfs and the actual root.
383 (mount "none" "/root" "overlay" 0
384 "lowerdir=/real-root,upperdir=/rw-root/upper,workdir=/rw-root/work"))
385 (begin
386 (check-file-system root type)
387 (mount root "/root" type flags options)))
388
389 ;; Make sure /root/etc/mtab is a symlink to /proc/self/mounts.
390 (false-if-exception
391 (delete-file "/root/etc/mtab"))
392 (mkdir-p "/root/etc")
393 (symlink "/proc/self/mounts" "/root/etc/mtab"))
394
395 (define (switch-root root)
396 "Switch to ROOT as the root file system, in a way similar to what
397 util-linux' switch_root(8) does."
398 (move-essential-file-systems root)
399 (chdir root)
400
401 ;; Since we're about to 'rm -rf /', try to make sure we're on an initrd.
402 ;; TODO: Use 'statfs' to check the fs type, like klibc does.
403 (when (or (not (file-exists? "/init")) (directory-exists? "/home"))
404 (format (current-error-port)
405 "The root file system is probably not an initrd; \
406 bailing out.~%root contents: ~s~%" (scandir "/"))
407 (force-output (current-error-port))
408 (exit 1))
409
410 ;; Delete files from the old root, without crossing mount points (assuming
411 ;; there are no mount points in sub-directories.) That means we're leaving
412 ;; the empty ROOT directory behind us, but that's OK.
413 (let ((root-device (stat:dev (stat "/"))))
414 (for-each (lambda (file)
415 (unless (member file '("." ".."))
416 (let* ((file (string-append "/" file))
417 (device (stat:dev (lstat file))))
418 (when (= device root-device)
419 (delete-file-recursively file)))))
420 (scandir "/")))
421
422 ;; Make ROOT the new root.
423 (mount root "/" "" MS_MOVE)
424 (chroot ".")
425 (chdir "/")
426
427 (when (file-exists? "/dev/console")
428 ;; Close the standard file descriptors since they refer to the old
429 ;; /dev/console, and reopen them.
430 (let ((console (open-file "/dev/console" "r+b0")))
431 (for-each close-fdes '(0 1 2))
432
433 (dup2 (fileno console) 0)
434 (dup2 (fileno console) 1)
435 (dup2 (fileno console) 2)
436
437 (close-port console))))
438
439 \f
440 (define* (boot-system #:key
441 (linux-modules '())
442 linux-module-directory
443 keymap-file
444 qemu-guest-networking?
445 volatile-root?
446 pre-mount
447 (mounts '())
448 (on-error 'debug))
449 "This procedure is meant to be called from an initrd. Boot a system by
450 first loading LINUX-MODULES (a list of module names) from
451 LINUX-MODULE-DIRECTORY, then installing KEYMAP-FILE with 'loadkeys' (if
452 KEYMAP-FILE is true), then setting up QEMU guest networking if
453 QEMU-GUEST-NETWORKING? is true, calling PRE-MOUNT, mounting the file systems
454 specified in MOUNTS, and finally booting into the new root if any. The initrd
455 supports kernel command-line options '--load', '--root', and '--repl'.
456
457 Mount the root file system, specified by the '--root' command-line argument,
458 if any.
459
460 MOUNTS must be a list of <file-system> objects.
461
462 When VOLATILE-ROOT? is true, the root file system is writable but any changes
463 to it are lost.
464
465 ON-ERROR is passed to 'call-with-error-handling'; it determines what happens
466 upon error."
467 (define (root-mount-point? fs)
468 (string=? (file-system-mount-point fs) "/"))
469
470 (define (device-string->file-system-device device-string)
471 ;; The "--root=SPEC" kernel command-line option always provides a
472 ;; string, but the string can represent a device, a UUID, or a
473 ;; label. So check for all three.
474 (cond ((string-prefix? "/" device-string) device-string)
475 ((uuid device-string) => identity)
476 (else (file-system-label device-string))))
477
478 (display "Welcome, this is GNU's early boot Guile.\n")
479 (display "Use '--repl' for an initrd REPL.\n\n")
480
481 (call-with-error-handling
482 (lambda ()
483 (mount-essential-file-systems)
484 (let* ((args (linux-command-line))
485 (to-load (find-long-option "--load" args))
486 (root-fs (find root-mount-point? mounts))
487 (root-fs-type (or (and=> root-fs file-system-type)
488 "ext4"))
489 (root-fs-device (and=> root-fs file-system-device))
490 (root-fs-flags (mount-flags->bit-mask
491 (or (and=> root-fs file-system-flags)
492 '())))
493 (root-options (if root-fs
494 (file-system-options root-fs)
495 #f))
496 ;; --root takes precedence over the 'device' field of the root
497 ;; <file-system> record.
498 (root-device (or (and=> (find-long-option "--root" args)
499 device-string->file-system-device)
500 root-fs-device)))
501
502 (when (member "--repl" args)
503 (start-repl))
504
505 (display "loading kernel modules...\n")
506 (load-linux-modules-from-directory linux-modules
507 linux-module-directory)
508
509 (when keymap-file
510 (let ((status (system* "loadkeys" keymap-file)))
511 (unless (zero? status)
512 ;; Emit a warning rather than abort when we cannot load
513 ;; KEYMAP-FILE.
514 (format (current-error-port)
515 "warning: 'loadkeys' exited with status ~a~%"
516 status))))
517
518 (when qemu-guest-networking?
519 (unless (configure-qemu-networking)
520 (display "network interface is DOWN\n")))
521
522 ;; Prepare the real root file system under /root.
523 (unless (file-exists? "/root")
524 (mkdir "/root"))
525
526 (when (procedure? pre-mount)
527 ;; Do whatever actions are needed before mounting the root file
528 ;; system--e.g., installing device mappings. Error out when the
529 ;; return value is false.
530 (unless (pre-mount)
531 (error "pre-mount actions failed")))
532
533 (setenv "EXT2FS_NO_MTAB_OK" "1")
534
535 (if root-device
536 (mount-root-file-system (canonicalize-device-spec root-device)
537 root-fs-type
538 #:volatile-root? volatile-root?
539 #:flags root-fs-flags
540 #:options root-options)
541 (mount "none" "/root" "tmpfs"))
542
543 ;; Mount the specified file systems.
544 (for-each mount-file-system
545 (remove root-mount-point? mounts))
546
547 (setenv "EXT2FS_NO_MTAB_OK" #f)
548
549 (if to-load
550 (begin
551 (switch-root "/root")
552 (format #t "loading '~a'...\n" to-load)
553
554 (primitive-load to-load)
555
556 (format (current-error-port)
557 "boot program '~a' terminated, rebooting~%"
558 to-load)
559 (sleep 2)
560 (reboot))
561 (begin
562 (display "no boot file passed via '--load'\n")
563 (display "entering a warm and cozy REPL\n")
564 (start-repl)))))
565 #:on-error on-error))
566
567 ;;; linux-boot.scm ends here