pack: Wrapper honors 'GUIX_EXECUTION_ENGINE' environment variable.
[jackhill/guix/guix.git] / gnu / packages / aux-files / run-in-namespace.c
CommitLineData
47a60325 1/* GNU Guix --- Functional package management for GNU
2520059b 2 Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
47a60325
LC
3
4 This file is part of GNU Guix.
5
6 GNU Guix is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or (at
9 your option) any later version.
10
11 GNU Guix is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
18
19/* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
20 mount namespace where the store is mounted in its right place.
21
22 We would happily do that in Scheme using 'call-with-container'. However,
23 this very program needs to be relocatable, so it needs to be statically
24 linked, which complicates things (Guile's modules can hardly be "linked"
25 into a single executable.) */
26
27#define _GNU_SOURCE
28#include <stdlib.h>
29#include <stdio.h>
30#include <unistd.h>
31#include <sched.h>
32#include <sys/mount.h>
33#include <errno.h>
34#include <libgen.h>
35#include <limits.h>
36#include <string.h>
37#include <assert.h>
38#include <sys/stat.h>
39#include <sys/types.h>
40#include <sys/wait.h>
41#include <fcntl.h>
42#include <dirent.h>
30da3173 43#include <sys/syscall.h>
47a60325 44
14928af2
LC
45/* Like 'malloc', but abort if 'malloc' returns NULL. */
46static void *
47xmalloc (size_t size)
48{
49 void *result = malloc (size);
50 assert (result != NULL);
51 return result;
52}
53
47a60325
LC
54/* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
55 caller must eventually free. */
56static char *
57concat (const char *directory, const char *file)
58{
14928af2 59 char *result = xmalloc (strlen (directory) + 2 + strlen (file));
47a60325
LC
60
61 strcpy (result, directory);
62 strcat (result, "/");
63 strcat (result, file);
64 return result;
65}
66
67static void
68mkdir_p (const char *directory)
69{
70 if (strcmp (directory, "/") != 0)
71 {
72 char *parent = dirname (strdupa (directory));
73 mkdir_p (parent);
74 int err = mkdir (directory, 0700);
75 if (err < 0 && errno != EEXIST)
76 assert_perror (errno);
77 }
78}
79
80static void
81rm_rf (const char *directory)
82{
83 DIR *stream = opendir (directory);
84
85 for (struct dirent *entry = readdir (stream);
86 entry != NULL;
87 entry = readdir (stream))
88 {
89 if (strcmp (entry->d_name, ".") == 0
90 || strcmp (entry->d_name, "..") == 0)
91 continue;
92
93 char *full = concat (directory, entry->d_name);
94
95 int err = unlink (full);
96 if (err < 0)
97 {
98 if (errno == EISDIR)
99 /* Recurse (we expect a shallow directory structure so there's
100 little risk of stack overflow.) */
101 rm_rf (full);
102 else
103 assert_perror (errno);
104 }
105
106 free (full);
107 }
108
109 closedir (stream);
110
111 int err = rmdir (directory);
112 if (err < 0 && errno != ENOENT)
113 assert_perror (errno);
114}
115
116/* Bind mount all the top-level entries in SOURCE to TARGET. */
117static void
118bind_mount (const char *source, const char *target)
119{
120 DIR *stream = opendir (source);
121
122 for (struct dirent *entry = readdir (stream);
123 entry != NULL;
124 entry = readdir (stream))
125 {
126 /* XXX: Some file systems may not report a useful 'd_type'. Ignore them
127 for now. */
128 assert (entry->d_type != DT_UNKNOWN);
129
130 if (strcmp (entry->d_name, ".") == 0
131 || strcmp (entry->d_name, "..") == 0)
132 continue;
133
134 char *abs_source = concat (source, entry->d_name);
135 char *new_entry = concat (target, entry->d_name);
136
137 if (entry->d_type == DT_LNK)
138 {
139 char target[PATH_MAX];
140
141 ssize_t result = readlink (abs_source, target, sizeof target - 1);
142 if (result > 0)
143 {
144 target[result] = '\0';
145 int err = symlink (target, new_entry);
146 if (err < 0)
147 assert_perror (errno);
148 }
149 }
150 else
151 {
152 /* Create the mount point. */
153 if (entry->d_type == DT_DIR)
154 {
155 int err = mkdir (new_entry, 0700);
156 if (err != 0)
157 assert_perror (errno);
158 }
159 else
160 close (open (new_entry, O_WRONLY | O_CREAT));
161
162 int err = mount (abs_source, new_entry, "none",
163 MS_BIND | MS_REC | MS_RDONLY, NULL);
164
165 /* It used to be that only directories could be bind-mounted. Thus,
166 keep going if we fail to bind-mount a non-directory entry.
167 That's OK because regular files in the root file system are
168 usually uninteresting. */
169 if (err != 0 && entry->d_type != DT_DIR)
170 assert_perror (errno);
171
172 free (new_entry);
173 free (abs_source);
174 }
175 }
176
177 closedir (stream);
178}
179
30da3173
LC
180/* Write the user/group ID map for PID to FILE, mapping ID to itself. See
181 user_namespaces(7). */
182static void
183write_id_map (pid_t pid, const char *file, int id)
184{
185 char id_map_file[100];
186 snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file);
187
188 char id_map[100];
189
190 /* Map root and the current user. */
191 int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id);
192 int fd = open (id_map_file, O_WRONLY);
193 if (fd < 0)
194 assert_perror (errno);
195
196 int n = write (fd, id_map, len);
197 if (n < 0)
198 assert_perror (errno);
199
200 close (fd);
201}
202
203/* Disallow setgroups(2) for PID. */
204static void
205disallow_setgroups (pid_t pid)
206{
207 char file[100];
208
209 snprintf (file, sizeof file, "/proc/%d/setgroups", pid);
210
211 int fd = open (file, O_WRONLY);
212 if (fd < 0)
213 assert_perror (errno);
214
215 int err = write (fd, "deny", 5);
216 if (err < 0)
217 assert_perror (errno);
218
219 close (fd);
220}
221
bdb9b4e8
LC
222/* Run the wrapper program in a separate mount user namespace. Return only
223 upon failure. */
224static void
225exec_in_user_namespace (const char *store, int argc, char *argv[])
226{
227 /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
228 bind-mounted in the right place. */
229 int err;
230 char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
231 char *new_store = concat (new_root, "@STORE_DIRECTORY@");
232 char *cwd = get_current_dir_name ();
233
234 /* Create a child with separate namespaces and set up bind-mounts from
235 there. That way, bind-mounts automatically disappear when the child
236 exits, which simplifies cleanup for the parent. Note: clone is more
237 convenient than fork + unshare since the parent can directly write
238 the child uid_map/gid_map files. */
239 pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER,
240 NULL, NULL, NULL);
241 switch (child)
242 {
243 case 0:
244 /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
245 we cannot make NEW_ROOT a tmpfs (which would have saved the need
246 for 'rm_rf'.) */
247 bind_mount ("/", new_root);
248 mkdir_p (new_store);
249 err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY,
250 NULL);
251 if (err < 0)
252 assert_perror (errno);
253
254 chdir (new_root);
255 err = chroot (new_root);
256 if (err < 0)
257 assert_perror (errno);
258
259 /* Change back to where we were before chroot'ing. */
260 chdir (cwd);
261
262 int err = execv ("@WRAPPED_PROGRAM@", argv);
263 if (err < 0)
264 assert_perror (errno);
265 break;
266
267 case -1:
268 /* Failure: user namespaces not supported. */
269 fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]);
270 rm_rf (new_root);
271 break;
272
273 default:
274 {
275 /* Map the current user/group ID in the child's namespace (the
276 default is to get the "overflow UID", i.e., the UID of
277 "nobody"). We must first disallow 'setgroups' for that
278 process. */
279 disallow_setgroups (child);
280 write_id_map (child, "uid_map", getuid ());
281 write_id_map (child, "gid_map", getgid ());
282
283 int status;
284 waitpid (child, &status, 0);
285 chdir ("/"); /* avoid EBUSY */
286 rm_rf (new_root);
287 free (new_root);
288
289 if (WIFEXITED (status))
290 exit (WEXITSTATUS (status));
291 else
292 /* Abnormal termination cannot really be reproduced, so exit
293 with 255. */
294 exit (255);
295 }
296 }
297}
298
47a60325 299\f
99aec37a
LC
300#ifdef PROOT_PROGRAM
301
302/* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
303 "bind-mounting" STORE in the right place. */
304static void
305exec_with_proot (const char *store, int argc, char *argv[])
306{
307 int proot_specific_argc = 4;
308 int proot_argc = argc + proot_specific_argc;
4a53c19a 309 char *proot_argv[proot_argc + 1], *proot;
99aec37a
LC
310 char bind_spec[strlen (store) + 1 + sizeof "@STORE_DIRECTORY@"];
311
312 strcpy (bind_spec, store);
313 strcat (bind_spec, ":");
314 strcat (bind_spec, "@STORE_DIRECTORY@");
315
316 proot = concat (store, PROOT_PROGRAM);
317
318 proot_argv[0] = proot;
319 proot_argv[1] = "-b";
320 proot_argv[2] = bind_spec;
321 proot_argv[3] = "@WRAPPED_PROGRAM@";
322
323 for (int i = 0; i < argc; i++)
324 proot_argv[i + proot_specific_argc] = argv[i + 1];
325
326 proot_argv[proot_argc] = NULL;
327
328 /* Seccomp support seems to invariably lead to segfaults; disable it by
329 default. */
330 setenv ("PROOT_NO_SECCOMP", "1", 0);
331
332 int err = execv (proot, proot_argv);
333 if (err < 0)
334 assert_perror (errno);
335}
336
337#endif
338
339\f
fde2aec3
LC
340/* Execution engines. */
341
342struct engine
343{
344 const char *name;
345 void (* exec) (const char *, int, char **);
346};
347
348static void
349buffer_stderr (void)
350{
351 static char stderr_buffer[4096];
352 setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer);
353}
354
355/* The default engine. */
356static void
357exec_default (const char *store, int argc, char *argv[])
358{
359 /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
360 fails but 'exec_with_proot' works. */
361 buffer_stderr ();
362
363 exec_in_user_namespace (store, argc, argv);
364#ifdef PROOT_PROGRAM
365 exec_with_proot (store, argc, argv);
366#endif
367}
368
369/* List of supported engines. */
370static const struct engine engines[] =
371 {
372 { "default", exec_default },
373 { "userns", exec_in_user_namespace },
374#ifdef PROOT_PROGRAM
375 { "proot", exec_with_proot },
376#endif
377 { NULL, NULL }
378 };
379
380/* Return the "execution engine" to use. */
381static const struct engine *
382execution_engine (void)
383{
384 const char *str = getenv ("GUIX_EXECUTION_ENGINE");
385
386 if (str == NULL)
387 str = "default";
388
389 try:
390 for (const struct engine *engine = engines;
391 engine->name != NULL;
392 engine++)
393 {
394 if (strcmp (engine->name, str) == 0)
395 return engine;
396 }
397
398 fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n",
399 str);
400 str = "default";
401 goto try;
402}
403
404\f
47a60325
LC
405int
406main (int argc, char *argv[])
407{
408 ssize_t size;
409 char self[PATH_MAX];
410 size = readlink ("/proc/self/exe", self, sizeof self - 1);
411 assert (size > 0);
412
413 /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
414 want to extract "/home/ludo/.local/gnu/store". */
415 size_t index = strlen (self)
416 - strlen ("@WRAPPED_PROGRAM@")
417 + strlen ("@STORE_DIRECTORY@");
418 char *store = strdup (self);
419 store[index] = '\0';
420
421 struct stat statbuf;
422
423 /* If STORE is already at the "right" place, we can execute
424 @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
425 needed when running one of these wrappers from within an unshare'd
426 namespace, because 'unshare' fails with EPERM in that context. */
427 if (strcmp (store, "@STORE_DIRECTORY@") != 0
428 && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0)
429 {
fde2aec3
LC
430 const struct engine *engine = execution_engine ();
431 engine->exec (store, argc, argv);
47a60325 432
fde2aec3
LC
433 /* If we reach this point, that's because ENGINE failed to do the
434 job. */
bdb9b4e8 435 fprintf (stderr, "\
30da3173
LC
436This may be because \"user namespaces\" are not supported on this system.\n\
437Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
438unless you move it to the '@STORE_DIRECTORY@' directory.\n\
439\n\
440Please refer to the 'guix pack' documentation for more information.\n");
bdb9b4e8 441 return EXIT_FAILURE;
47a60325
LC
442 }
443
444 /* The executable is available under @STORE_DIRECTORY@, so we can now
445 execute it. */
446 int err = execv ("@WRAPPED_PROGRAM@", argv);
447 if (err < 0)
448 assert_perror (errno);
449
450 return EXIT_FAILURE;
451}