gnu: Add fakechroot.
[jackhill/guix/guix.git] / gnu / packages / aux-files / run-in-namespace.c
CommitLineData
47a60325 1/* GNU Guix --- Functional package management for GNU
2520059b 2 Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
47a60325
LC
3
4 This file is part of GNU Guix.
5
6 GNU Guix is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or (at
9 your option) any later version.
10
11 GNU Guix is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
18
19/* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
20 mount namespace where the store is mounted in its right place.
21
22 We would happily do that in Scheme using 'call-with-container'. However,
23 this very program needs to be relocatable, so it needs to be statically
24 linked, which complicates things (Guile's modules can hardly be "linked"
25 into a single executable.) */
26
27#define _GNU_SOURCE
28#include <stdlib.h>
29#include <stdio.h>
30#include <unistd.h>
31#include <sched.h>
32#include <sys/mount.h>
33#include <errno.h>
34#include <libgen.h>
35#include <limits.h>
36#include <string.h>
37#include <assert.h>
38#include <sys/stat.h>
39#include <sys/types.h>
40#include <sys/wait.h>
41#include <fcntl.h>
42#include <dirent.h>
30da3173 43#include <sys/syscall.h>
47a60325 44
0cd13e72
LC
45/* The original store, "/gnu/store" by default. */
46static const char original_store[] = "@STORE_DIRECTORY@";
47
48
14928af2
LC
49/* Like 'malloc', but abort if 'malloc' returns NULL. */
50static void *
51xmalloc (size_t size)
52{
53 void *result = malloc (size);
54 assert (result != NULL);
55 return result;
56}
57
47a60325
LC
58/* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
59 caller must eventually free. */
60static char *
61concat (const char *directory, const char *file)
62{
14928af2 63 char *result = xmalloc (strlen (directory) + 2 + strlen (file));
47a60325
LC
64
65 strcpy (result, directory);
66 strcat (result, "/");
67 strcat (result, file);
68 return result;
69}
70
71static void
72mkdir_p (const char *directory)
73{
74 if (strcmp (directory, "/") != 0)
75 {
76 char *parent = dirname (strdupa (directory));
77 mkdir_p (parent);
78 int err = mkdir (directory, 0700);
79 if (err < 0 && errno != EEXIST)
80 assert_perror (errno);
81 }
82}
83
84static void
85rm_rf (const char *directory)
86{
87 DIR *stream = opendir (directory);
88
89 for (struct dirent *entry = readdir (stream);
90 entry != NULL;
91 entry = readdir (stream))
92 {
93 if (strcmp (entry->d_name, ".") == 0
94 || strcmp (entry->d_name, "..") == 0)
95 continue;
96
97 char *full = concat (directory, entry->d_name);
98
99 int err = unlink (full);
100 if (err < 0)
101 {
102 if (errno == EISDIR)
103 /* Recurse (we expect a shallow directory structure so there's
104 little risk of stack overflow.) */
105 rm_rf (full);
106 else
107 assert_perror (errno);
108 }
109
110 free (full);
111 }
112
113 closedir (stream);
114
115 int err = rmdir (directory);
116 if (err < 0 && errno != ENOENT)
117 assert_perror (errno);
118}
119
120/* Bind mount all the top-level entries in SOURCE to TARGET. */
121static void
122bind_mount (const char *source, const char *target)
123{
124 DIR *stream = opendir (source);
125
126 for (struct dirent *entry = readdir (stream);
127 entry != NULL;
128 entry = readdir (stream))
129 {
130 /* XXX: Some file systems may not report a useful 'd_type'. Ignore them
131 for now. */
132 assert (entry->d_type != DT_UNKNOWN);
133
134 if (strcmp (entry->d_name, ".") == 0
135 || strcmp (entry->d_name, "..") == 0)
136 continue;
137
138 char *abs_source = concat (source, entry->d_name);
139 char *new_entry = concat (target, entry->d_name);
140
141 if (entry->d_type == DT_LNK)
142 {
143 char target[PATH_MAX];
144
145 ssize_t result = readlink (abs_source, target, sizeof target - 1);
146 if (result > 0)
147 {
148 target[result] = '\0';
149 int err = symlink (target, new_entry);
150 if (err < 0)
151 assert_perror (errno);
152 }
153 }
154 else
155 {
156 /* Create the mount point. */
157 if (entry->d_type == DT_DIR)
158 {
159 int err = mkdir (new_entry, 0700);
160 if (err != 0)
161 assert_perror (errno);
162 }
163 else
164 close (open (new_entry, O_WRONLY | O_CREAT));
165
166 int err = mount (abs_source, new_entry, "none",
167 MS_BIND | MS_REC | MS_RDONLY, NULL);
168
169 /* It used to be that only directories could be bind-mounted. Thus,
170 keep going if we fail to bind-mount a non-directory entry.
171 That's OK because regular files in the root file system are
172 usually uninteresting. */
173 if (err != 0 && entry->d_type != DT_DIR)
174 assert_perror (errno);
175
176 free (new_entry);
177 free (abs_source);
178 }
179 }
180
181 closedir (stream);
182}
183
30da3173
LC
184/* Write the user/group ID map for PID to FILE, mapping ID to itself. See
185 user_namespaces(7). */
186static void
187write_id_map (pid_t pid, const char *file, int id)
188{
189 char id_map_file[100];
190 snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file);
191
192 char id_map[100];
193
194 /* Map root and the current user. */
195 int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id);
196 int fd = open (id_map_file, O_WRONLY);
197 if (fd < 0)
198 assert_perror (errno);
199
200 int n = write (fd, id_map, len);
201 if (n < 0)
202 assert_perror (errno);
203
204 close (fd);
205}
206
207/* Disallow setgroups(2) for PID. */
208static void
209disallow_setgroups (pid_t pid)
210{
211 char file[100];
212
213 snprintf (file, sizeof file, "/proc/%d/setgroups", pid);
214
215 int fd = open (file, O_WRONLY);
216 if (fd < 0)
217 assert_perror (errno);
218
219 int err = write (fd, "deny", 5);
220 if (err < 0)
221 assert_perror (errno);
222
223 close (fd);
224}
225
bdb9b4e8
LC
226/* Run the wrapper program in a separate mount user namespace. Return only
227 upon failure. */
228static void
229exec_in_user_namespace (const char *store, int argc, char *argv[])
230{
231 /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
232 bind-mounted in the right place. */
233 int err;
234 char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
0cd13e72 235 char *new_store = concat (new_root, original_store);
bdb9b4e8
LC
236 char *cwd = get_current_dir_name ();
237
238 /* Create a child with separate namespaces and set up bind-mounts from
239 there. That way, bind-mounts automatically disappear when the child
240 exits, which simplifies cleanup for the parent. Note: clone is more
241 convenient than fork + unshare since the parent can directly write
242 the child uid_map/gid_map files. */
243 pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER,
244 NULL, NULL, NULL);
245 switch (child)
246 {
247 case 0:
248 /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
249 we cannot make NEW_ROOT a tmpfs (which would have saved the need
250 for 'rm_rf'.) */
251 bind_mount ("/", new_root);
252 mkdir_p (new_store);
253 err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY,
254 NULL);
255 if (err < 0)
256 assert_perror (errno);
257
258 chdir (new_root);
259 err = chroot (new_root);
260 if (err < 0)
261 assert_perror (errno);
262
263 /* Change back to where we were before chroot'ing. */
264 chdir (cwd);
265
266 int err = execv ("@WRAPPED_PROGRAM@", argv);
267 if (err < 0)
268 assert_perror (errno);
269 break;
270
271 case -1:
272 /* Failure: user namespaces not supported. */
273 fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]);
274 rm_rf (new_root);
275 break;
276
277 default:
278 {
279 /* Map the current user/group ID in the child's namespace (the
280 default is to get the "overflow UID", i.e., the UID of
281 "nobody"). We must first disallow 'setgroups' for that
282 process. */
283 disallow_setgroups (child);
284 write_id_map (child, "uid_map", getuid ());
285 write_id_map (child, "gid_map", getgid ());
286
287 int status;
288 waitpid (child, &status, 0);
289 chdir ("/"); /* avoid EBUSY */
290 rm_rf (new_root);
291 free (new_root);
292
293 if (WIFEXITED (status))
294 exit (WEXITSTATUS (status));
295 else
296 /* Abnormal termination cannot really be reproduced, so exit
297 with 255. */
298 exit (255);
299 }
300 }
301}
302
47a60325 303\f
99aec37a
LC
304#ifdef PROOT_PROGRAM
305
306/* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
307 "bind-mounting" STORE in the right place. */
308static void
309exec_with_proot (const char *store, int argc, char *argv[])
310{
311 int proot_specific_argc = 4;
312 int proot_argc = argc + proot_specific_argc;
4a53c19a 313 char *proot_argv[proot_argc + 1], *proot;
0cd13e72 314 char bind_spec[strlen (store) + 1 + sizeof original_store];
99aec37a
LC
315
316 strcpy (bind_spec, store);
317 strcat (bind_spec, ":");
0cd13e72 318 strcat (bind_spec, original_store);
99aec37a
LC
319
320 proot = concat (store, PROOT_PROGRAM);
321
322 proot_argv[0] = proot;
323 proot_argv[1] = "-b";
324 proot_argv[2] = bind_spec;
325 proot_argv[3] = "@WRAPPED_PROGRAM@";
326
327 for (int i = 0; i < argc; i++)
328 proot_argv[i + proot_specific_argc] = argv[i + 1];
329
330 proot_argv[proot_argc] = NULL;
331
332 /* Seccomp support seems to invariably lead to segfaults; disable it by
333 default. */
334 setenv ("PROOT_NO_SECCOMP", "1", 0);
335
336 int err = execv (proot, proot_argv);
337 if (err < 0)
338 assert_perror (errno);
339}
340
341#endif
342
343\f
fde2aec3
LC
344/* Execution engines. */
345
346struct engine
347{
348 const char *name;
349 void (* exec) (const char *, int, char **);
350};
351
352static void
353buffer_stderr (void)
354{
355 static char stderr_buffer[4096];
356 setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer);
357}
358
359/* The default engine. */
360static void
361exec_default (const char *store, int argc, char *argv[])
362{
363 /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
364 fails but 'exec_with_proot' works. */
365 buffer_stderr ();
366
367 exec_in_user_namespace (store, argc, argv);
368#ifdef PROOT_PROGRAM
369 exec_with_proot (store, argc, argv);
370#endif
371}
372
373/* List of supported engines. */
374static const struct engine engines[] =
375 {
376 { "default", exec_default },
377 { "userns", exec_in_user_namespace },
378#ifdef PROOT_PROGRAM
379 { "proot", exec_with_proot },
380#endif
381 { NULL, NULL }
382 };
383
384/* Return the "execution engine" to use. */
385static const struct engine *
386execution_engine (void)
387{
388 const char *str = getenv ("GUIX_EXECUTION_ENGINE");
389
390 if (str == NULL)
391 str = "default";
392
393 try:
394 for (const struct engine *engine = engines;
395 engine->name != NULL;
396 engine++)
397 {
398 if (strcmp (engine->name, str) == 0)
399 return engine;
400 }
401
402 fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n",
403 str);
404 str = "default";
405 goto try;
406}
407
408\f
47a60325
LC
409int
410main (int argc, char *argv[])
411{
412 ssize_t size;
413 char self[PATH_MAX];
414 size = readlink ("/proc/self/exe", self, sizeof self - 1);
415 assert (size > 0);
416
417 /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
418 want to extract "/home/ludo/.local/gnu/store". */
419 size_t index = strlen (self)
0cd13e72 420 - strlen ("@WRAPPED_PROGRAM@") + strlen (original_store);
47a60325
LC
421 char *store = strdup (self);
422 store[index] = '\0';
423
424 struct stat statbuf;
425
426 /* If STORE is already at the "right" place, we can execute
427 @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
428 needed when running one of these wrappers from within an unshare'd
429 namespace, because 'unshare' fails with EPERM in that context. */
0cd13e72 430 if (strcmp (store, original_store) != 0
47a60325
LC
431 && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0)
432 {
fde2aec3
LC
433 const struct engine *engine = execution_engine ();
434 engine->exec (store, argc, argv);
47a60325 435
fde2aec3
LC
436 /* If we reach this point, that's because ENGINE failed to do the
437 job. */
bdb9b4e8 438 fprintf (stderr, "\
30da3173
LC
439This may be because \"user namespaces\" are not supported on this system.\n\
440Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
441unless you move it to the '@STORE_DIRECTORY@' directory.\n\
442\n\
443Please refer to the 'guix pack' documentation for more information.\n");
bdb9b4e8 444 return EXIT_FAILURE;
47a60325
LC
445 }
446
447 /* The executable is available under @STORE_DIRECTORY@, so we can now
448 execute it. */
449 int err = execv ("@WRAPPED_PROGRAM@", argv);
450 if (err < 0)
451 assert_perror (errno);
452
453 return EXIT_FAILURE;
454}