1 ;;; GNU Guix --- Functional package management for GNU
2 ;;; Copyright © 2013, 2014 Ludovic Courtès <ludo@gnu.org>
4 ;;; This file is part of GNU Guix.
6 ;;; GNU Guix is free software; you can redistribute it and/or modify it
7 ;;; under the terms of the GNU General Public License as published by
8 ;;; the Free Software Foundation; either version 3 of the License, or (at
9 ;;; your option) any later version.
11 ;;; GNU Guix is distributed in the hope that it will be useful, but
12 ;;; WITHOUT ANY WARRANTY; without even the implied warranty of
13 ;;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 ;;; GNU General Public License for more details.
16 ;;; You should have received a copy of the GNU General Public License
17 ;;; along with GNU Guix. If not, see <http://www.gnu.org/licenses/>.
19 (define-module (guix build linux-initrd)
20 #:use-module (rnrs io ports)
21 #:use-module (rnrs bytevectors)
22 #:use-module (system foreign)
23 #:use-module (system repl error-handling)
24 #:autoload (system repl repl) (start-repl)
25 #:autoload (system base compile) (compile-file)
26 #:use-module (srfi srfi-1)
27 #:use-module (srfi srfi-26)
28 #:use-module (ice-9 match)
29 #:use-module (ice-9 rdelim)
30 #:use-module (ice-9 ftw)
31 #:use-module (guix build utils)
32 #:export (mount-essential-file-systems
35 make-essential-device-nodes
36 configure-qemu-networking
39 partition-label-predicate
40 find-partition-by-label
41 canonicalize-device-spec
54 ;;; Utility procedures useful in a Linux initial RAM disk (initrd). Note that
55 ;;; many of these use procedures not yet available in vanilla Guile (`mount',
56 ;;; `load-linux-module', etc.); these are provided by a Guile patch used in
57 ;;; the GNU distribution.
61 (define* (mount-essential-file-systems #:key (root "/"))
62 "Mount /proc and /sys under ROOT."
65 (if (string-suffix? "/" root)
70 (unless (file-exists? (scope "proc"))
71 (mkdir (scope "proc")))
72 (mount "none" (scope "proc") "proc")
74 (unless (file-exists? (scope "sys"))
75 (mkdir (scope "sys")))
76 (mount "none" (scope "sys") "sysfs"))
78 (define (move-essential-file-systems root)
79 "Move currently mounted essential file systems to ROOT."
80 (for-each (lambda (dir)
81 (let ((target (string-append root dir)))
82 (unless (file-exists? target)
84 (mount dir target "" MS_MOVE)))
87 (define (linux-command-line)
88 "Return the Linux kernel command line as a list of strings."
90 (call-with-input-file "/proc/cmdline"
93 (define (find-long-option option arguments)
94 "Find OPTION among ARGUMENTS, where OPTION is something like \"--load\".
95 Return the value associated with OPTION, or #f on failure."
96 (let ((opt (string-append option "=")))
97 (and=> (find (cut string-prefix? opt <>)
100 (substring arg (+ 1 (string-index arg #\=)))))))
102 (define-syntax %ext2-endianness
103 ;; Endianness of ext2 file systems.
104 (identifier-syntax (endianness little)))
106 ;; Offset in bytes of interesting parts of an ext2 superblock. See
107 ;; <http://www.nongnu.org/ext2-doc/ext2.html#DEF-SUPERBLOCK>.
108 ;; TODO: Use "packed structs" from Guile-OpenGL or similar.
109 (define-syntax %ext2-sblock-magic (identifier-syntax 56))
110 (define-syntax %ext2-sblock-creator-os (identifier-syntax 72))
111 (define-syntax %ext2-sblock-uuid (identifier-syntax 104))
112 (define-syntax %ext2-sblock-volume-name (identifier-syntax 120))
114 (define (read-ext2-superblock device)
115 "Return the raw contents of DEVICE's ext2 superblock as a bytevector, or #f
116 if DEVICE does not contain an ext2 file system."
118 ;; The magic bytes that identify an ext2 file system.
121 (define superblock-size
122 ;; Size of the interesting part of an ext2 superblock.
126 ;; The superblock contents.
127 (make-bytevector superblock-size))
129 (call-with-input-file device
131 (seek port 1024 SEEK_SET)
133 ;; Note: work around <http://bugs.gnu.org/17466>.
134 (and (eqv? superblock-size (get-bytevector-n! port block 0
136 (let ((magic (bytevector-u16-ref block %ext2-sblock-magic
138 (and (= magic %ext2-magic)
141 (define (ext2-superblock-uuid sblock)
142 "Return the UUID of ext2 superblock SBLOCK as a 16-byte bytevector."
143 (let ((uuid (make-bytevector 16)))
144 (bytevector-copy! sblock %ext2-sblock-uuid uuid 0 16)
147 (define (ext2-superblock-volume-name sblock)
148 "Return the volume name of SBLOCK as a string of at most 16 characters, or
149 #f if SBLOCK has no volume name."
150 (let ((bv (make-bytevector 16)))
151 (bytevector-copy! sblock %ext2-sblock-volume-name bv 0 16)
153 ;; This is a Latin-1, nul-terminated string.
154 (let ((bytes (take-while (negate zero?) (bytevector->u8-list bv))))
157 (list->string (map integer->char bytes))))))
159 (define (disk-partitions)
160 "Return the list of device names corresponding to valid disk partitions."
161 (define (partition? major minor)
162 (let ((marker (format #f "/sys/dev/block/~a:~a/partition" major minor)))
165 (not (zero? (call-with-input-file marker read))))
167 (if (= ENOENT (system-error-errno args))
169 (apply throw args))))))
171 (call-with-input-file "/proc/partitions"
173 ;; Skip the two header lines.
177 ;; Read each subsequent line, and extract the last space-separated
179 (let loop ((parts '()))
180 (let ((line (read-line port)))
181 (if (eof-object? line)
183 (match (string-tokenize line)
184 (((= string->number major) (= string->number minor)
186 (if (partition? major minor)
187 (loop (cons name parts))
188 (loop parts))))))))))
190 (define (partition-label-predicate label)
191 "Return a procedure that, when applied to a partition name such as \"sda1\",
192 return #t if that partition's volume name is LABEL."
194 (let* ((device (string-append "/dev/" part))
195 (sblock (catch 'system-error
197 (read-ext2-superblock device))
199 ;; When running on the hand-made /dev,
200 ;; 'disk-partitions' could return partitions for which
201 ;; we have no /dev node. Handle that gracefully.
202 (if (= ENOENT (system-error-errno args))
204 (format (current-error-port)
205 "warning: device '~a' not found~%"
208 (apply throw args))))))
210 (let ((volume (ext2-superblock-volume-name sblock)))
212 (string=? volume label)))))))
214 (define (find-partition-by-label label)
215 "Return the first partition found whose volume name is LABEL, or #f if none
217 (and=> (find (partition-label-predicate label)
219 (cut string-append "/dev/" <>)))
221 (define* (canonicalize-device-spec spec #:optional (title 'any))
222 "Return the device name corresponding to SPEC. TITLE is a symbol, one of
225 • 'device', in which case SPEC is known to designate a device node--e.g.,
227 • 'label', in which case SPEC is known to designate a partition label--e.g.,
229 • 'any', in which case SPEC can be anything.
232 ;; Number of times we retry partition label resolution.
235 (define canonical-title
236 ;; The realm of canonicalization.
238 (if (string-prefix? "/" spec)
243 (case canonical-title
248 ;; Resolve the label.
249 (let loop ((count 0))
250 (let ((device (find-partition-by-label spec)))
252 ;; Some devices take a bit of time to appear, most notably USB
253 ;; storage devices. Thus, wait for the device to appear.
254 (if (> count max-trials)
255 (error "failed to resolve partition label" spec)
258 (loop (+ 1 count))))))))
259 ;; TODO: Add support for UUIDs.
261 (error "unknown device title" title))))
263 (define* (make-disk-device-nodes base major #:optional (minor 0))
264 "Make the block device nodes around BASE (something like \"/root/dev/sda\")
265 with the given MAJOR number, starting with MINOR."
266 (mknod base 'block-special #o644 (device-number major minor))
269 (mknod (string-append base (number->string i))
270 'block-special #o644 (device-number major (+ minor i)))
273 (define* (make-essential-device-nodes #:key (root "/"))
274 "Make essential device nodes under ROOT/dev."
275 ;; The hand-made udev!
279 (if (string-suffix? "/" root)
284 (unless (file-exists? (scope "dev"))
285 (mkdir (scope "dev")))
287 ;; Make the device nodes for SCSI disks.
288 (make-disk-device-nodes (scope "dev/sda") 8)
289 (make-disk-device-nodes (scope "dev/sdb") 8 16)
290 (make-disk-device-nodes (scope "dev/sdc") 8 32)
291 (make-disk-device-nodes (scope "dev/sdd") 8 48)
293 ;; SCSI CD-ROM devices (aka. "/dev/sr0" etc.).
294 (mknod (scope "dev/scd0") 'block-special #o644 (device-number 11 0))
295 (mknod (scope "dev/scd1") 'block-special #o644 (device-number 11 1))
297 ;; The virtio (para-virtualized) block devices, as supported by QEMU/KVM.
298 (make-disk-device-nodes (scope "dev/vda") 252)
300 ;; Memory (used by Xorg's VESA driver.)
301 (mknod (scope "dev/mem") 'char-special #o640 (device-number 1 1))
302 (mknod (scope "dev/kmem") 'char-special #o640 (device-number 1 2))
304 ;; Inputs (used by Xorg.)
305 (unless (file-exists? (scope "dev/input"))
306 (mkdir (scope "dev/input")))
307 (mknod (scope "dev/input/mice") 'char-special #o640 (device-number 13 63))
308 (mknod (scope "dev/input/mouse0") 'char-special #o640 (device-number 13 32))
309 (mknod (scope "dev/input/event0") 'char-special #o640 (device-number 13 64))
311 ;; System console. This node is magically created by the kernel on the
312 ;; initrd's root, so don't try to create it in that case.
313 (unless (string=? root "/")
314 (mknod (scope "dev/console") 'char-special #o600
315 (device-number 5 1)))
318 (mknod (scope "dev/tty") 'char-special #o600
320 (chmod (scope "dev/tty") #o666)
323 (let ((name (format #f "dev/tty~a" n)))
324 (mknod (scope name) 'char-special #o600
329 (mknod (scope "dev/ttyS0") 'char-special #o660
330 (device-number 4 64))
333 (mknod (scope "dev/ptmx") 'char-special #o666
335 (chmod (scope "dev/ptmx") #o666)
337 ;; Create /dev/pts; it will be mounted later, at boot time.
338 (unless (file-exists? (scope "dev/pts"))
339 (mkdir (scope "dev/pts")))
341 ;; Rendez-vous point for syslogd.
342 (mknod (scope "dev/log") 'socket #o666 0)
343 (mknod (scope "dev/kmsg") 'char-special #o600 (device-number 1 11))
345 ;; Other useful nodes, notably relied on by guix-daemon.
346 (for-each (match-lambda
348 (mknod (scope file) 'char-special #o666
349 (device-number major minor))
350 (chmod (scope file) #o666)))
355 ("dev/urandom" 1 9)))
357 (symlink "/proc/self/fd" (scope "dev/fd"))
358 (symlink "/proc/self/fd/0" (scope "dev/stdin"))
359 (symlink "/proc/self/fd/1" (scope "dev/stdout"))
360 (symlink "/proc/self/fd/2" (scope "dev/stderr"))
365 (mknod (scope (string-append "dev/loop" (number->string i)))
370 ;; File systems in user space (FUSE).
371 (mknod (scope "dev/fuse") 'char-special #o666 (device-number 10 229)))
373 (define %host-qemu-ipv4-address
374 (inet-pton AF_INET "10.0.2.10"))
376 (define* (configure-qemu-networking #:optional (interface "eth0"))
377 "Setup the INTERFACE network interface and /etc/resolv.conf according to
378 QEMU's default networking settings (see net/slirp.c in QEMU for default
379 networking values.) Return #t if INTERFACE is up, #f otherwise."
380 (display "configuring QEMU networking...\n")
381 (let* ((sock (socket AF_INET SOCK_STREAM 0))
382 (address (make-socket-address AF_INET %host-qemu-ipv4-address 0))
383 (flags (network-interface-flags sock interface)))
384 (set-network-interface-address sock interface address)
385 (set-network-interface-flags sock interface (logior flags IFF_UP))
387 (unless (file-exists? "/etc")
389 (call-with-output-file "/etc/resolv.conf"
391 (display "nameserver 10.0.2.3\n" p)))
393 (logand (network-interface-flags sock interface) IFF_UP)))
395 ;; Linux mount flags, from libc's <sys/mount.h>.
400 (define MS_BIND 4096)
401 (define MS_MOVE 8192)
403 (define (bind-mount source target)
404 "Bind-mount SOURCE at TARGET."
405 (mount source target "" MS_BIND))
407 (define (load-linux-module* file)
408 "Load Linux module from FILE, the name of a `.ko' file."
409 (define (slurp module)
410 (call-with-input-file file get-bytevector-all))
412 (load-linux-module (slurp file)))
414 (define (device-number major minor)
415 "Return the device number for the device with MAJOR and MINOR, for use as
416 the last argument of `mknod'."
417 (+ (* major 256) minor))
419 (define (pidof program)
420 "Return the PID of the first presumed instance of PROGRAM."
421 (let ((program (basename program)))
423 (let ((exe (format #f "/proc/~a/exe" pid)))
424 (and=> (false-if-exception (readlink exe))
425 (compose (cut string=? program <>) basename))))
426 (filter-map string->number (scandir "/proc")))))
428 (define* (mount-root-file-system root type
429 #:key volatile-root? (unionfs "unionfs"))
430 "Mount the root file system of type TYPE at device ROOT. If VOLATILE-ROOT?
431 is true, mount ROOT read-only and make it a union with a writable tmpfs using
433 (define (mark-as-not-killable pid)
434 ;; Tell the 'user-processes' dmd service that PID must be kept alive when
436 (mkdir-p "/root/etc/dmd")
437 (let ((port (open-file "/root/etc/dmd/do-not-kill" "a")))
447 (mkdir-p "/real-root")
448 (mount root "/real-root" type MS_RDONLY)
450 (mount "none" "/rw-root" "tmpfs")
452 ;; We want read-write /dev nodes.
453 (make-essential-device-nodes #:root "/rw-root")
455 ;; Make /root a union of the tmpfs and the actual root. Use
456 ;; 'max_files' to set a high RLIMIT_NOFILE for the unionfs process
457 ;; itself. Failing to do that, we quickly run out of file
458 ;; descriptors; see <http://bugs.gnu.org/17827>.
459 (unless (zero? (system* unionfs "-o"
460 "cow,allow_other,use_ino,suid,dev,max_files=65536"
461 "/rw-root=RW:/real-root=RO"
463 (error "unionfs failed"))
465 ;; Make sure unionfs remains alive till the end. Because
466 ;; 'fuse_daemonize' doesn't tell the PID of the forked daemon, we
467 ;; have to resort to 'pidof' here.
468 (mark-as-not-killable (pidof unionfs)))
470 (check-file-system root type)
471 (mount root "/root" type))))
473 (format (current-error-port) "exception while mounting '~a': ~s~%"
477 (copy-file "/proc/mounts" "/root/etc/mtab"))
479 (define (check-file-system device type)
480 "Run a file system check of TYPE on DEVICE."
482 (string-append "fsck." type))
484 (let ((status (system* fsck "-v" "-p" device)))
485 (match (status:exit-val status)
489 (format (current-error-port) "'~a' corrected errors on ~a; continuing~%"
492 (format (current-error-port) "'~a' corrected errors on ~a; rebooting~%"
497 (format (current-error-port) "'~a' exited with code ~a on ~a; spawning REPL~%"
501 (define (mount-flags->bit-mask flags)
502 "Return the number suitable for the 'flags' argument of 'mount' that
503 corresponds to the symbols listed in FLAGS."
504 (let loop ((flags flags))
506 (('read-only rest ...)
507 (logior MS_RDONLY (loop rest)))
508 (('bind-mount rest ...)
509 (logior MS_BIND (loop rest)))
511 (logior MS_NOSUID (loop rest)))
513 (logior MS_NODEV (loop rest)))
515 (logior MS_NOEXEC (loop rest)))
519 (define* (mount-file-system spec #:key (root "/root"))
520 "Mount the file system described by SPEC under ROOT. SPEC must have the
523 (DEVICE TITLE MOUNT-POINT TYPE (FLAGS ...) OPTIONS CHECK?)
525 DEVICE, MOUNT-POINT, and TYPE must be strings; OPTIONS can be a string or #f;
526 FLAGS must be a list of symbols. CHECK? is a Boolean indicating whether to
527 run a file system check."
529 ((source title mount-point type (flags ...) options check?)
530 (let ((source (canonicalize-device-spec source title))
531 (mount-point (string-append root "/" mount-point)))
533 (check-file-system source type))
534 (mkdir-p mount-point)
535 (mount source mount-point type (mount-flags->bit-mask flags)
537 (string->pointer options)
541 (mkdir-p (string-append root "/etc"))
542 (let ((port (open-file (string-append root "/etc/mtab") "a")))
543 (format port "~a ~a ~a ~a 0 0~%"
544 source mount-point type (or options ""))
545 (close-port port))))))
547 (define (switch-root root)
548 "Switch to ROOT as the root file system, in a way similar to what
549 util-linux' switch_root(8) does."
550 (move-essential-file-systems root)
553 ;; Since we're about to 'rm -rf /', try to make sure we're on an initrd.
554 ;; TODO: Use 'statfs' to check the fs type, like klibc does.
555 (when (or (not (file-exists? "/init")) (directory-exists? "/home"))
556 (format (current-error-port)
557 "The root file system is probably not an initrd; \
558 bailing out.~%root contents: ~s~%" (scandir "/"))
559 (force-output (current-error-port))
562 ;; Delete files from the old root, without crossing mount points (assuming
563 ;; there are no mount points in sub-directories.) That means we're leaving
564 ;; the empty ROOT directory behind us, but that's OK.
565 (let ((root-device (stat:dev (stat "/"))))
566 (for-each (lambda (file)
567 (unless (member file '("." ".."))
568 (let* ((file (string-append "/" file))
569 (device (stat:dev (lstat file))))
570 (when (= device root-device)
571 (delete-file-recursively file)))))
574 ;; Make ROOT the new root.
575 (mount root "/" "" MS_MOVE)
579 (when (file-exists? "/dev/console")
580 ;; Close the standard file descriptors since they refer to the old
581 ;; /dev/console, and reopen them.
582 (let ((console (open-file "/dev/console" "r+b0")))
583 (for-each close-fdes '(0 1 2))
585 (dup2 (fileno console) 0)
586 (dup2 (fileno console) 1)
587 (dup2 (fileno console) 2)
589 (close-port console))))
592 (define* (boot-system #:key
594 qemu-guest-networking?
595 guile-modules-in-chroot?
598 "This procedure is meant to be called from an initrd. Boot a system by
599 first loading LINUX-MODULES, then setting up QEMU guest networking if
600 QEMU-GUEST-NETWORKING? is true, mounting the file systems specified in MOUNTS,
601 and finally booting into the new root if any. The initrd supports kernel
602 command-line options '--load', '--root', and '--repl'.
604 Mount the root file system, specified by the '--root' command-line argument,
607 MOUNTS must be a list suitable for 'mount-file-system'.
609 When GUILE-MODULES-IN-CHROOT? is true, make core Guile modules available in
612 When VOLATILE-ROOT? is true, the root file system is writable but any changes
614 (define root-mount-point?
616 ((device _ "/" _ ...) #t)
620 (or (any (match-lambda
621 ((device _ "/" type _ ...) type)
626 (display "Welcome, this is GNU's early boot Guile.\n")
627 (display "Use '--repl' for an initrd REPL.\n\n")
629 (call-with-error-handling
631 (mount-essential-file-systems)
632 (let* ((args (linux-command-line))
633 (to-load (find-long-option "--load" args))
634 (root (find-long-option "--root" args)))
636 (when (member "--repl" args)
639 (display "loading kernel modules...\n")
640 (for-each (compose load-linux-module*
641 (cut string-append "/modules/" <>))
644 (when qemu-guest-networking?
645 (unless (configure-qemu-networking)
646 (display "network interface is DOWN\n")))
649 (make-essential-device-nodes)
651 ;; Prepare the real root file system under /root.
652 (unless (file-exists? "/root")
655 (mount-root-file-system (canonicalize-device-spec root)
657 #:volatile-root? volatile-root?)
658 (mount "none" "/root" "tmpfs"))
660 (unless (file-exists? "/root/dev")
662 (make-essential-device-nodes #:root "/root"))
664 ;; Mount the specified file systems.
665 (for-each mount-file-system
666 (remove root-mount-point? mounts))
668 (when guile-modules-in-chroot?
669 ;; Copy the directories that contain .scm and .go files so that the
670 ;; child process in the chroot can load modules (we would bind-mount
671 ;; them but for some reason that fails with EINVAL -- XXX).
672 (mkdir-p "/root/share")
673 (mkdir-p "/root/lib")
674 (mount "none" "/root/share" "tmpfs")
675 (mount "none" "/root/lib" "tmpfs")
676 (copy-recursively "/share" "/root/share"
677 #:log (%make-void-port "w"))
678 (copy-recursively "/lib" "/root/lib"
679 #:log (%make-void-port "w")))
683 (switch-root "/root")
684 (format #t "loading '~a'...\n" to-load)
686 ;; TODO: Remove /lib, /share, and /loader.go.
687 (primitive-load to-load)
689 (format (current-error-port)
690 "boot program '~a' terminated, rebooting~%"
695 (display "no boot file passed via '--load'\n")
696 (display "entering a warm and cozy REPL\n")
699 ;;; linux-initrd.scm ends here