gnu: Add cxxopts.
[jackhill/guix/guix.git] / gnu / packages / aux-files / run-in-namespace.c
CommitLineData
47a60325 1/* GNU Guix --- Functional package management for GNU
2520059b 2 Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
47a60325
LC
3
4 This file is part of GNU Guix.
5
6 GNU Guix is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or (at
9 your option) any later version.
10
11 GNU Guix is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
18
19/* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
20 mount namespace where the store is mounted in its right place.
21
22 We would happily do that in Scheme using 'call-with-container'. However,
23 this very program needs to be relocatable, so it needs to be statically
24 linked, which complicates things (Guile's modules can hardly be "linked"
25 into a single executable.) */
26
27#define _GNU_SOURCE
28#include <stdlib.h>
29#include <stdio.h>
30#include <unistd.h>
31#include <sched.h>
32#include <sys/mount.h>
33#include <errno.h>
34#include <libgen.h>
35#include <limits.h>
36#include <string.h>
37#include <assert.h>
38#include <sys/stat.h>
39#include <sys/types.h>
40#include <sys/wait.h>
41#include <fcntl.h>
42#include <dirent.h>
30da3173 43#include <sys/syscall.h>
47a60325 44
64562321
LC
45/* Whether we're building the ld.so/libfakechroot wrapper. */
46#define HAVE_EXEC_WITH_LOADER \
47 (defined PROGRAM_INTERPRETER) && (defined LOADER_AUDIT_MODULE) \
48 && (defined FAKECHROOT_LIBRARY)
49
0cd13e72
LC
50/* The original store, "/gnu/store" by default. */
51static const char original_store[] = "@STORE_DIRECTORY@";
52
53
14928af2
LC
54/* Like 'malloc', but abort if 'malloc' returns NULL. */
55static void *
56xmalloc (size_t size)
57{
58 void *result = malloc (size);
59 assert (result != NULL);
60 return result;
61}
62
47a60325
LC
63/* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
64 caller must eventually free. */
65static char *
66concat (const char *directory, const char *file)
67{
14928af2 68 char *result = xmalloc (strlen (directory) + 2 + strlen (file));
47a60325
LC
69
70 strcpy (result, directory);
71 strcat (result, "/");
72 strcat (result, file);
73 return result;
74}
75
76static void
77mkdir_p (const char *directory)
78{
79 if (strcmp (directory, "/") != 0)
80 {
81 char *parent = dirname (strdupa (directory));
82 mkdir_p (parent);
83 int err = mkdir (directory, 0700);
84 if (err < 0 && errno != EEXIST)
85 assert_perror (errno);
86 }
87}
88
89static void
90rm_rf (const char *directory)
91{
92 DIR *stream = opendir (directory);
93
94 for (struct dirent *entry = readdir (stream);
95 entry != NULL;
96 entry = readdir (stream))
97 {
98 if (strcmp (entry->d_name, ".") == 0
99 || strcmp (entry->d_name, "..") == 0)
100 continue;
101
102 char *full = concat (directory, entry->d_name);
103
104 int err = unlink (full);
105 if (err < 0)
106 {
107 if (errno == EISDIR)
108 /* Recurse (we expect a shallow directory structure so there's
109 little risk of stack overflow.) */
110 rm_rf (full);
111 else
112 assert_perror (errno);
113 }
114
115 free (full);
116 }
117
118 closedir (stream);
119
120 int err = rmdir (directory);
121 if (err < 0 && errno != ENOENT)
122 assert_perror (errno);
123}
124
64562321
LC
125/* Make TARGET a bind-mount of SOURCE. Take into account ENTRY's type, which
126 corresponds to SOURCE. */
127static int
128bind_mount (const char *source, const struct dirent *entry,
129 const char *target)
130{
131 if (entry->d_type == DT_DIR)
132 {
133 int err = mkdir (target, 0700);
134 if (err != 0)
135 return err;
136 }
137 else
138 close (open (target, O_WRONLY | O_CREAT));
139
140 return mount (source, target, "none",
141 MS_BIND | MS_REC | MS_RDONLY, NULL);
142}
143
144#if HAVE_EXEC_WITH_LOADER
145
146/* Make TARGET a symlink to SOURCE. */
147static int
148make_symlink (const char *source, const struct dirent *entry,
149 const char *target)
150{
151 return symlink (source, target);
152}
153
154#endif
155
156/* Mirror with FIRMLINK all the top-level entries in SOURCE to TARGET. */
47a60325 157static void
64562321
LC
158mirror_directory (const char *source, const char *target,
159 int (* firmlink) (const char *, const struct dirent *,
160 const char *))
47a60325
LC
161{
162 DIR *stream = opendir (source);
163
164 for (struct dirent *entry = readdir (stream);
165 entry != NULL;
166 entry = readdir (stream))
167 {
168 /* XXX: Some file systems may not report a useful 'd_type'. Ignore them
169 for now. */
170 assert (entry->d_type != DT_UNKNOWN);
171
172 if (strcmp (entry->d_name, ".") == 0
173 || strcmp (entry->d_name, "..") == 0)
174 continue;
175
176 char *abs_source = concat (source, entry->d_name);
177 char *new_entry = concat (target, entry->d_name);
178
179 if (entry->d_type == DT_LNK)
180 {
181 char target[PATH_MAX];
182
183 ssize_t result = readlink (abs_source, target, sizeof target - 1);
184 if (result > 0)
185 {
186 target[result] = '\0';
187 int err = symlink (target, new_entry);
188 if (err < 0)
189 assert_perror (errno);
190 }
191 }
192 else
193 {
194 /* Create the mount point. */
64562321 195 int err = firmlink (abs_source, entry, new_entry);
47a60325
LC
196
197 /* It used to be that only directories could be bind-mounted. Thus,
198 keep going if we fail to bind-mount a non-directory entry.
199 That's OK because regular files in the root file system are
200 usually uninteresting. */
201 if (err != 0 && entry->d_type != DT_DIR)
202 assert_perror (errno);
203
204 free (new_entry);
205 free (abs_source);
206 }
207 }
208
209 closedir (stream);
210}
211
30da3173
LC
212/* Write the user/group ID map for PID to FILE, mapping ID to itself. See
213 user_namespaces(7). */
214static void
215write_id_map (pid_t pid, const char *file, int id)
216{
217 char id_map_file[100];
218 snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file);
219
220 char id_map[100];
221
222 /* Map root and the current user. */
223 int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id);
224 int fd = open (id_map_file, O_WRONLY);
225 if (fd < 0)
226 assert_perror (errno);
227
228 int n = write (fd, id_map, len);
229 if (n < 0)
230 assert_perror (errno);
231
232 close (fd);
233}
234
235/* Disallow setgroups(2) for PID. */
236static void
237disallow_setgroups (pid_t pid)
238{
239 char file[100];
240
241 snprintf (file, sizeof file, "/proc/%d/setgroups", pid);
242
243 int fd = open (file, O_WRONLY);
244 if (fd < 0)
245 assert_perror (errno);
246
247 int err = write (fd, "deny", 5);
248 if (err < 0)
249 assert_perror (errno);
250
251 close (fd);
252}
253
bdb9b4e8
LC
254/* Run the wrapper program in a separate mount user namespace. Return only
255 upon failure. */
256static void
257exec_in_user_namespace (const char *store, int argc, char *argv[])
258{
259 /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
260 bind-mounted in the right place. */
261 int err;
262 char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
0cd13e72 263 char *new_store = concat (new_root, original_store);
bdb9b4e8
LC
264 char *cwd = get_current_dir_name ();
265
266 /* Create a child with separate namespaces and set up bind-mounts from
267 there. That way, bind-mounts automatically disappear when the child
268 exits, which simplifies cleanup for the parent. Note: clone is more
269 convenient than fork + unshare since the parent can directly write
270 the child uid_map/gid_map files. */
271 pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER,
272 NULL, NULL, NULL);
273 switch (child)
274 {
275 case 0:
276 /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
277 we cannot make NEW_ROOT a tmpfs (which would have saved the need
278 for 'rm_rf'.) */
64562321 279 mirror_directory ("/", new_root, bind_mount);
bdb9b4e8
LC
280 mkdir_p (new_store);
281 err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY,
282 NULL);
283 if (err < 0)
284 assert_perror (errno);
285
286 chdir (new_root);
287 err = chroot (new_root);
288 if (err < 0)
289 assert_perror (errno);
290
291 /* Change back to where we were before chroot'ing. */
292 chdir (cwd);
293
294 int err = execv ("@WRAPPED_PROGRAM@", argv);
295 if (err < 0)
296 assert_perror (errno);
297 break;
298
299 case -1:
300 /* Failure: user namespaces not supported. */
301 fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]);
302 rm_rf (new_root);
303 break;
304
305 default:
306 {
307 /* Map the current user/group ID in the child's namespace (the
308 default is to get the "overflow UID", i.e., the UID of
309 "nobody"). We must first disallow 'setgroups' for that
310 process. */
311 disallow_setgroups (child);
312 write_id_map (child, "uid_map", getuid ());
313 write_id_map (child, "gid_map", getgid ());
314
315 int status;
316 waitpid (child, &status, 0);
317 chdir ("/"); /* avoid EBUSY */
318 rm_rf (new_root);
319 free (new_root);
320
321 if (WIFEXITED (status))
322 exit (WEXITSTATUS (status));
323 else
324 /* Abnormal termination cannot really be reproduced, so exit
325 with 255. */
326 exit (255);
327 }
328 }
329}
330
47a60325 331\f
99aec37a
LC
332#ifdef PROOT_PROGRAM
333
334/* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
335 "bind-mounting" STORE in the right place. */
336static void
337exec_with_proot (const char *store, int argc, char *argv[])
338{
339 int proot_specific_argc = 4;
340 int proot_argc = argc + proot_specific_argc;
4a53c19a 341 char *proot_argv[proot_argc + 1], *proot;
0cd13e72 342 char bind_spec[strlen (store) + 1 + sizeof original_store];
99aec37a
LC
343
344 strcpy (bind_spec, store);
345 strcat (bind_spec, ":");
0cd13e72 346 strcat (bind_spec, original_store);
99aec37a
LC
347
348 proot = concat (store, PROOT_PROGRAM);
349
350 proot_argv[0] = proot;
351 proot_argv[1] = "-b";
352 proot_argv[2] = bind_spec;
353 proot_argv[3] = "@WRAPPED_PROGRAM@";
354
355 for (int i = 0; i < argc; i++)
356 proot_argv[i + proot_specific_argc] = argv[i + 1];
357
358 proot_argv[proot_argc] = NULL;
359
360 /* Seccomp support seems to invariably lead to segfaults; disable it by
361 default. */
362 setenv ("PROOT_NO_SECCOMP", "1", 0);
363
364 int err = execv (proot, proot_argv);
365 if (err < 0)
366 assert_perror (errno);
367}
368
369#endif
370
371\f
64562321
LC
372#if HAVE_EXEC_WITH_LOADER
373
c6c0d5a2
LC
374/* Traverse PATH, a NULL-terminated string array, and return a colon-separated
375 search path where each item of PATH has been relocated to STORE. The
376 result is malloc'd. */
377static char *
378relocated_search_path (const char *path[], const char *store)
379{
380 char *new_path;
381 size_t size = 0;
382
383 for (size_t i = 0; path[i] != NULL; i++)
384 size += strlen (store) + strlen (path[i]) + 1; /* upper bound */
385
386 new_path = xmalloc (size + 1);
387 new_path[0] = '\0';
388
389 for (size_t i = 0; path[i] != NULL; i++)
390 {
391 if (strncmp (path[i], original_store,
392 sizeof original_store - 1) == 0)
393 {
394 strcat (new_path, store);
395 strcat (new_path, path[i] + sizeof original_store - 1);
396 }
397 else
398 strcat (new_path, path[i]); /* possibly $ORIGIN */
399
400 strcat (new_path, ":");
401 }
402
403 new_path[strlen (new_path) - 1] = '\0'; /* Remove trailing colon. */
404
405 return new_path;
406}
407
28dce8f0
LC
408/* Concatenate PATH1 and PATH2 with a colon in between. The result is
409 potentially malloc'd. */
410static char *
411concat_paths (const char *path1, const char *path2)
412{
413 if (path1[0] == '\0')
414 return (char *) path2;
415 else
416 {
417 char *result = xmalloc (strlen (path1) + strlen (path2) + 2);
418 strcpy (result, path1);
419 strcat (result, ":");
420 strcat (result, path2);
421 return result;
422 }
423}
424
64562321
LC
425/* Execute the wrapped program by invoking the loader (ld.so) directly,
426 passing it the audit module and preloading libfakechroot.so. */
427static void
428exec_with_loader (const char *store, int argc, char *argv[])
429{
c6c0d5a2 430 static const char *audit_library_path[] = LOADER_AUDIT_RUNPATH;
64562321
LC
431 char *loader = concat (store,
432 PROGRAM_INTERPRETER + sizeof original_store);
c6c0d5a2 433 size_t loader_specific_argc = 8;
64562321
LC
434 size_t loader_argc = argc + loader_specific_argc;
435 char *loader_argv[loader_argc + 1];
436 loader_argv[0] = argv[0];
437 loader_argv[1] = "--audit";
438 loader_argv[2] = concat (store,
439 LOADER_AUDIT_MODULE + sizeof original_store);
c6c0d5a2 440
28dce8f0
LC
441 /* The audit module depends on libc.so and libgcc_s.so so honor
442 AUDIT_LIBRARY_PATH. Additionally, honor $LD_LIBRARY_PATH if set. */
c6c0d5a2 443 loader_argv[3] = "--library-path";
28dce8f0
LC
444 loader_argv[4] =
445 concat_paths (getenv ("LD_LIBRARY_PATH") ?: "",
446 relocated_search_path (audit_library_path, store));
c6c0d5a2
LC
447
448 loader_argv[5] = "--preload";
449 loader_argv[6] = concat (store,
64562321 450 FAKECHROOT_LIBRARY + sizeof original_store);
c6c0d5a2 451 loader_argv[7] = concat (store,
64562321
LC
452 "@WRAPPED_PROGRAM@" + sizeof original_store);
453
454 for (size_t i = 0; i < argc; i++)
455 loader_argv[i + loader_specific_argc] = argv[i + 1];
456
457 loader_argv[loader_argc] = NULL;
458
459 /* Set up the root directory. */
460 int err;
461 char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
462 mirror_directory ("/", new_root, make_symlink);
463
c088aa29
LC
464 /* 'mirror_directory' created a symlink for the ancestor of ORIGINAL_STORE,
465 typically "/gnu". Remove that entry so we can create NEW_STORE
466 below. */
467 const char *slash = strchr (original_store + 1, '/');
468 const char *top = slash != NULL
469 ? strndupa (original_store, slash - original_store)
470 : original_store;
471 char *new_store_top = concat (new_root, top);
472 unlink (new_store_top);
473
474 /* Now create the store under NEW_ROOT. */
64562321
LC
475 char *new_store = concat (new_root, original_store);
476 char *new_store_parent = dirname (strdup (new_store));
477 mkdir_p (new_store_parent);
c088aa29
LC
478 err = symlink (store, new_store);
479 if (err < 0)
480 assert_perror (errno);
64562321
LC
481
482#ifdef GCONV_DIRECTORY
483 /* Tell libc where to find its gconv modules. This is necessary because
484 gconv uses non-interposable 'open' calls. */
485 char *gconv_path = concat (store,
486 GCONV_DIRECTORY + sizeof original_store);
487 setenv ("GCONV_PATH", gconv_path, 1);
488 free (gconv_path);
489#endif
490
491 setenv ("FAKECHROOT_BASE", new_root, 1);
492
493 pid_t child = fork ();
494 switch (child)
495 {
496 case 0:
497 err = execv (loader, loader_argv);
498 if (err < 0)
499 assert_perror (errno);
500 exit (EXIT_FAILURE);
501 break;
502
503 case -1:
504 assert_perror (errno);
505 exit (EXIT_FAILURE);
506 break;
507
508 default:
509 {
510 int status;
511 waitpid (child, &status, 0);
512 chdir ("/"); /* avoid EBUSY */
513 rm_rf (new_root);
514 free (new_root);
515
516 close (2); /* flushing stderr should be silent */
517
518 if (WIFEXITED (status))
519 exit (WEXITSTATUS (status));
520 else
521 /* Abnormal termination cannot really be reproduced, so exit
522 with 255. */
523 exit (255);
524 }
525 }
526}
527
528#endif
529
530\f
fde2aec3
LC
531/* Execution engines. */
532
533struct engine
534{
535 const char *name;
536 void (* exec) (const char *, int, char **);
537};
538
539static void
540buffer_stderr (void)
541{
542 static char stderr_buffer[4096];
543 setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer);
544}
545
64562321 546/* The default engine: choose a robust method. */
fde2aec3
LC
547static void
548exec_default (const char *store, int argc, char *argv[])
549{
550 /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
551 fails but 'exec_with_proot' works. */
552 buffer_stderr ();
553
554 exec_in_user_namespace (store, argc, argv);
555#ifdef PROOT_PROGRAM
556 exec_with_proot (store, argc, argv);
557#endif
558}
559
64562321
LC
560/* The "performance" engine: choose performance over robustness. */
561static void
562exec_performance (const char *store, int argc, char *argv[])
563{
564 buffer_stderr ();
565
566 exec_in_user_namespace (store, argc, argv);
567#if HAVE_EXEC_WITH_LOADER
568 exec_with_loader (store, argc, argv);
569#endif
570}
571
fde2aec3
LC
572/* List of supported engines. */
573static const struct engine engines[] =
574 {
575 { "default", exec_default },
64562321 576 { "performance", exec_performance },
fde2aec3
LC
577 { "userns", exec_in_user_namespace },
578#ifdef PROOT_PROGRAM
579 { "proot", exec_with_proot },
64562321
LC
580#endif
581#if HAVE_EXEC_WITH_LOADER
582 { "fakechroot", exec_with_loader },
fde2aec3
LC
583#endif
584 { NULL, NULL }
585 };
586
587/* Return the "execution engine" to use. */
588static const struct engine *
589execution_engine (void)
590{
591 const char *str = getenv ("GUIX_EXECUTION_ENGINE");
592
593 if (str == NULL)
594 str = "default";
595
596 try:
597 for (const struct engine *engine = engines;
598 engine->name != NULL;
599 engine++)
600 {
601 if (strcmp (engine->name, str) == 0)
602 return engine;
603 }
604
605 fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n",
606 str);
607 str = "default";
608 goto try;
609}
610
611\f
47a60325
LC
612int
613main (int argc, char *argv[])
614{
615 ssize_t size;
616 char self[PATH_MAX];
617 size = readlink ("/proc/self/exe", self, sizeof self - 1);
618 assert (size > 0);
619
620 /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
621 want to extract "/home/ludo/.local/gnu/store". */
622 size_t index = strlen (self)
0cd13e72 623 - strlen ("@WRAPPED_PROGRAM@") + strlen (original_store);
47a60325
LC
624 char *store = strdup (self);
625 store[index] = '\0';
626
627 struct stat statbuf;
628
629 /* If STORE is already at the "right" place, we can execute
630 @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
631 needed when running one of these wrappers from within an unshare'd
632 namespace, because 'unshare' fails with EPERM in that context. */
0cd13e72 633 if (strcmp (store, original_store) != 0
47a60325
LC
634 && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0)
635 {
fde2aec3
LC
636 const struct engine *engine = execution_engine ();
637 engine->exec (store, argc, argv);
47a60325 638
fde2aec3
LC
639 /* If we reach this point, that's because ENGINE failed to do the
640 job. */
bdb9b4e8 641 fprintf (stderr, "\
30da3173
LC
642This may be because \"user namespaces\" are not supported on this system.\n\
643Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
644unless you move it to the '@STORE_DIRECTORY@' directory.\n\
645\n\
646Please refer to the 'guix pack' documentation for more information.\n");
bdb9b4e8 647 return EXIT_FAILURE;
47a60325
LC
648 }
649
650 /* The executable is available under @STORE_DIRECTORY@, so we can now
651 execute it. */
652 int err = execv ("@WRAPPED_PROGRAM@", argv);
653 if (err < 0)
654 assert_perror (errno);
655
656 return EXIT_FAILURE;
657}