1 /* GNU Guix --- Functional package management for GNU
2 Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
4 This file is part of GNU Guix.
6 GNU Guix is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or (at
9 your option) any later version.
11 GNU Guix is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
19 /* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
20 mount namespace where the store is mounted in its right place.
22 We would happily do that in Scheme using 'call-with-container'. However,
23 this very program needs to be relocatable, so it needs to be statically
24 linked, which complicates things (Guile's modules can hardly be "linked"
25 into a single executable.) */
32 #include <sys/mount.h>
39 #include <sys/types.h>
43 #include <sys/syscall.h>
44 #include <sys/prctl.h>
46 /* Whether we're building the ld.so/libfakechroot wrapper. */
47 #define HAVE_EXEC_WITH_LOADER \
48 (defined PROGRAM_INTERPRETER) && (defined LOADER_AUDIT_MODULE) \
49 && (defined FAKECHROOT_LIBRARY)
51 /* The original store, "/gnu/store" by default. */
52 static const char original_store
[] = "@STORE_DIRECTORY@";
55 /* Like 'malloc', but abort if 'malloc' returns NULL. */
59 void *result
= malloc (size
);
60 assert (result
!= NULL
);
64 /* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
65 caller must eventually free. */
67 concat (const char *directory
, const char *file
)
69 char *result
= xmalloc (strlen (directory
) + 2 + strlen (file
));
71 strcpy (result
, directory
);
73 strcat (result
, file
);
78 mkdir_p (const char *directory
)
80 if (strcmp (directory
, "/") != 0)
82 char *parent
= dirname (strdupa (directory
));
84 int err
= mkdir (directory
, 0700);
85 if (err
< 0 && errno
!= EEXIST
)
86 assert_perror (errno
);
91 rm_rf (const char *directory
)
93 DIR *stream
= opendir (directory
);
95 for (struct dirent
*entry
= readdir (stream
);
97 entry
= readdir (stream
))
99 if (strcmp (entry
->d_name
, ".") == 0
100 || strcmp (entry
->d_name
, "..") == 0)
103 char *full
= concat (directory
, entry
->d_name
);
105 int err
= unlink (full
);
109 /* Recurse (we expect a shallow directory structure so there's
110 little risk of stack overflow.) */
113 assert_perror (errno
);
121 int err
= rmdir (directory
);
122 if (err
< 0 && errno
!= ENOENT
)
123 assert_perror (errno
);
126 /* Make TARGET a bind-mount of SOURCE. Take into account ENTRY's type, which
127 corresponds to SOURCE. */
129 bind_mount (const char *source
, const struct dirent
*entry
,
132 if (entry
->d_type
== DT_DIR
)
134 int err
= mkdir (target
, 0700);
139 close (open (target
, O_WRONLY
| O_CREAT
));
141 return mount (source
, target
, "none",
142 MS_BIND
| MS_REC
| MS_RDONLY
, NULL
);
145 #if HAVE_EXEC_WITH_LOADER
147 /* Make TARGET a symlink to SOURCE. */
149 make_symlink (const char *source
, const struct dirent
*entry
,
152 return symlink (source
, target
);
157 /* Mirror with FIRMLINK all the top-level entries in SOURCE to TARGET. */
159 mirror_directory (const char *source
, const char *target
,
160 int (* firmlink
) (const char *, const struct dirent
*,
163 DIR *stream
= opendir (source
);
165 for (struct dirent
*entry
= readdir (stream
);
167 entry
= readdir (stream
))
169 /* XXX: Some file systems may not report a useful 'd_type'. Ignore them
171 assert (entry
->d_type
!= DT_UNKNOWN
);
173 if (strcmp (entry
->d_name
, ".") == 0
174 || strcmp (entry
->d_name
, "..") == 0)
177 char *abs_source
= concat (source
, entry
->d_name
);
178 char *new_entry
= concat (target
, entry
->d_name
);
180 if (entry
->d_type
== DT_LNK
)
182 char target
[PATH_MAX
];
184 ssize_t result
= readlink (abs_source
, target
, sizeof target
- 1);
187 target
[result
] = '\0';
188 int err
= symlink (target
, new_entry
);
190 assert_perror (errno
);
195 /* Create the mount point. */
196 int err
= firmlink (abs_source
, entry
, new_entry
);
198 /* It used to be that only directories could be bind-mounted. Thus,
199 keep going if we fail to bind-mount a non-directory entry.
200 That's OK because regular files in the root file system are
201 usually uninteresting. */
202 if (err
!= 0 && entry
->d_type
!= DT_DIR
)
203 assert_perror (errno
);
213 /* Write the user/group ID map for PID to FILE, mapping ID to itself. See
214 user_namespaces(7). */
216 write_id_map (pid_t pid
, const char *file
, int id
)
218 char id_map_file
[100];
219 snprintf (id_map_file
, sizeof id_map_file
, "/proc/%d/%s", pid
, file
);
223 /* Map root and the current user. */
224 int len
= snprintf (id_map
, sizeof id_map
, "%d %d 1\n", id
, id
);
225 int fd
= open (id_map_file
, O_WRONLY
);
227 assert_perror (errno
);
229 int n
= write (fd
, id_map
, len
);
231 assert_perror (errno
);
236 /* Disallow setgroups(2) for PID. */
238 disallow_setgroups (pid_t pid
)
242 snprintf (file
, sizeof file
, "/proc/%d/setgroups", pid
);
244 int fd
= open (file
, O_WRONLY
);
246 assert_perror (errno
);
248 int err
= write (fd
, "deny", 5);
250 assert_perror (errno
);
255 /* Run the wrapper program in a separate mount user namespace. Return only
258 exec_in_user_namespace (const char *store
, int argc
, char *argv
[])
260 /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
261 bind-mounted in the right place. */
263 char *new_root
= mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
264 char *new_store
= concat (new_root
, original_store
);
265 char *cwd
= get_current_dir_name ();
267 /* Become the new parent of grand-children when their parent dies. */
268 prctl (PR_SET_CHILD_SUBREAPER
, 1);
270 /* Optionally, make NEW_ROOT a tmpfs. That way, if we have to leave it
271 behind because there are sub-processes still running when this wrapper
273 err
= mount ("none", new_root
, "tmpfs", 0, NULL
);
274 is_tmpfs
= (err
== 0);
276 /* Create a child with separate namespaces and set up bind-mounts from
277 there. That way, bind-mounts automatically disappear when the child
278 exits, which simplifies cleanup for the parent. Note: clone is more
279 convenient than fork + unshare since the parent can directly write
280 the child uid_map/gid_map files. */
281 pid_t child
= syscall (SYS_clone
, SIGCHLD
| CLONE_NEWNS
| CLONE_NEWUSER
,
286 /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
287 we cannot make NEW_ROOT a tmpfs (which would have saved the need
289 mirror_directory ("/", new_root
, bind_mount
);
291 err
= mount (store
, new_store
, "none", MS_BIND
| MS_REC
| MS_RDONLY
,
294 assert_perror (errno
);
297 err
= chroot (new_root
);
299 assert_perror (errno
);
301 /* Change back to where we were before chroot'ing. */
304 int err
= execv ("@WRAPPED_PROGRAM@", argv
);
306 assert_perror (errno
);
310 /* Failure: user namespaces not supported. */
311 fprintf (stderr
, "%s: error: 'clone' failed: %m\n", argv
[0]);
318 /* Map the current user/group ID in the child's namespace (the
319 default is to get the "overflow UID", i.e., the UID of
320 "nobody"). We must first disallow 'setgroups' for that
322 disallow_setgroups (child
);
323 write_id_map (child
, "uid_map", getuid ());
324 write_id_map (child
, "gid_map", getgid ());
326 int status
, status_other
;
327 waitpid (child
, &status
, 0);
329 chdir ("/"); /* avoid EBUSY */
332 /* NEW_ROOT lives on in child processes and we no longer need it
333 to exist as an empty directory in the global namespace. */
337 /* Check whether there are child processes left. If there are none,
338 we can remove NEW_ROOT just fine. Conversely, if there are
339 processes left (for example because this wrapper's child forked),
340 we have to leave NEW_ROOT behind so that those processes can still
341 access their root file system (XXX). */
342 else if (waitpid (-1 , &status_other
, WNOHANG
) == -1)
347 if (WIFEXITED (status
))
348 exit (WEXITSTATUS (status
));
350 /* Abnormal termination cannot really be reproduced, so exit
360 /* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
361 "bind-mounting" STORE in the right place. */
363 exec_with_proot (const char *store
, int argc
, char *argv
[])
365 int proot_specific_argc
= 4;
366 int proot_argc
= argc
+ proot_specific_argc
;
367 char *proot_argv
[proot_argc
+ 1], *proot
;
368 char bind_spec
[strlen (store
) + 1 + sizeof original_store
];
370 strcpy (bind_spec
, store
);
371 strcat (bind_spec
, ":");
372 strcat (bind_spec
, original_store
);
374 proot
= concat (store
, PROOT_PROGRAM
);
376 proot_argv
[0] = proot
;
377 proot_argv
[1] = "-b";
378 proot_argv
[2] = bind_spec
;
379 proot_argv
[3] = "@WRAPPED_PROGRAM@";
381 for (int i
= 0; i
< argc
; i
++)
382 proot_argv
[i
+ proot_specific_argc
] = argv
[i
+ 1];
384 proot_argv
[proot_argc
] = NULL
;
386 /* Seccomp support seems to invariably lead to segfaults; disable it by
388 setenv ("PROOT_NO_SECCOMP", "1", 0);
390 int err
= execv (proot
, proot_argv
);
392 assert_perror (errno
);
398 #if HAVE_EXEC_WITH_LOADER
400 /* Traverse PATH, a NULL-terminated string array, and return a colon-separated
401 search path where each item of PATH has been relocated to STORE. The
402 result is malloc'd. */
404 relocated_search_path (const char *path
[], const char *store
)
409 for (size_t i
= 0; path
[i
] != NULL
; i
++)
410 size
+= strlen (store
) + strlen (path
[i
]) + 1; /* upper bound */
412 new_path
= xmalloc (size
+ 1);
415 for (size_t i
= 0; path
[i
] != NULL
; i
++)
417 if (strncmp (path
[i
], original_store
,
418 sizeof original_store
- 1) == 0)
420 strcat (new_path
, store
);
421 strcat (new_path
, path
[i
] + sizeof original_store
- 1);
424 strcat (new_path
, path
[i
]); /* possibly $ORIGIN */
426 strcat (new_path
, ":");
429 new_path
[strlen (new_path
) - 1] = '\0'; /* Remove trailing colon. */
434 /* Concatenate PATH1 and PATH2 with a colon in between. The result is
435 potentially malloc'd. */
437 concat_paths (const char *path1
, const char *path2
)
439 if (path1
[0] == '\0')
440 return (char *) path2
;
443 char *result
= xmalloc (strlen (path1
) + strlen (path2
) + 2);
444 strcpy (result
, path1
);
445 strcat (result
, ":");
446 strcat (result
, path2
);
451 /* Execute the wrapped program by invoking the loader (ld.so) directly,
452 passing it the audit module and preloading libfakechroot.so. */
454 exec_with_loader (const char *store
, int argc
, char *argv
[])
456 static const char *audit_library_path
[] = LOADER_AUDIT_RUNPATH
;
457 char *loader
= concat (store
,
458 PROGRAM_INTERPRETER
+ sizeof original_store
);
459 size_t loader_specific_argc
= 8;
460 size_t loader_argc
= argc
+ loader_specific_argc
;
461 char *loader_argv
[loader_argc
+ 1];
462 loader_argv
[0] = argv
[0];
463 loader_argv
[1] = "--audit";
464 loader_argv
[2] = concat (store
,
465 LOADER_AUDIT_MODULE
+ sizeof original_store
);
467 /* The audit module depends on libc.so and libgcc_s.so so honor
468 AUDIT_LIBRARY_PATH. Additionally, honor $LD_LIBRARY_PATH if set. */
469 loader_argv
[3] = "--library-path";
471 concat_paths (getenv ("LD_LIBRARY_PATH") ?: "",
472 relocated_search_path (audit_library_path
, store
));
474 loader_argv
[5] = "--preload";
475 loader_argv
[6] = concat (store
,
476 FAKECHROOT_LIBRARY
+ sizeof original_store
);
477 loader_argv
[7] = concat (store
,
478 "@WRAPPED_PROGRAM@" + sizeof original_store
);
480 for (size_t i
= 0; i
< argc
; i
++)
481 loader_argv
[i
+ loader_specific_argc
] = argv
[i
+ 1];
483 loader_argv
[loader_argc
] = NULL
;
485 /* Set up the root directory. */
487 char *new_root
= mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
488 mirror_directory ("/", new_root
, make_symlink
);
490 /* 'mirror_directory' created a symlink for the ancestor of ORIGINAL_STORE,
491 typically "/gnu". Remove that entry so we can create NEW_STORE
493 const char *slash
= strchr (original_store
+ 1, '/');
494 const char *top
= slash
!= NULL
495 ? strndupa (original_store
, slash
- original_store
)
497 char *new_store_top
= concat (new_root
, top
);
498 unlink (new_store_top
);
500 /* Now create the store under NEW_ROOT. */
501 char *new_store
= concat (new_root
, original_store
);
502 char *new_store_parent
= dirname (strdup (new_store
));
503 mkdir_p (new_store_parent
);
504 err
= symlink (store
, new_store
);
506 assert_perror (errno
);
508 #ifdef GCONV_DIRECTORY
509 /* Tell libc where to find its gconv modules. This is necessary because
510 gconv uses non-interposable 'open' calls. */
511 char *gconv_path
= concat (store
,
512 GCONV_DIRECTORY
+ sizeof original_store
);
513 setenv ("GCONV_PATH", gconv_path
, 1);
517 setenv ("FAKECHROOT_BASE", new_root
, 1);
519 /* Become the new parent of grand-children when their parent dies. */
520 prctl (PR_SET_CHILD_SUBREAPER
, 1);
522 pid_t child
= fork ();
526 err
= execv (loader
, loader_argv
);
528 assert_perror (errno
);
533 assert_perror (errno
);
539 int status
, status_other
;
540 waitpid (child
, &status
, 0);
542 /* If there are child processes still running, leave NEW_ROOT around
543 so they can still access it. XXX: In that case NEW_ROOT is left
545 if (waitpid (-1 , &status_other
, WNOHANG
) == -1)
547 chdir ("/"); /* avoid EBUSY */
552 close (2); /* flushing stderr should be silent */
554 if (WIFEXITED (status
))
555 exit (WEXITSTATUS (status
));
557 /* Abnormal termination cannot really be reproduced, so exit
567 /* Execution engines. */
572 void (* exec
) (const char *, int, char **);
578 static char stderr_buffer
[4096];
579 setvbuf (stderr
, stderr_buffer
, _IOFBF
, sizeof stderr_buffer
);
582 /* The default engine: choose a robust method. */
584 exec_default (const char *store
, int argc
, char *argv
[])
586 /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
587 fails but 'exec_with_proot' works. */
590 exec_in_user_namespace (store
, argc
, argv
);
592 exec_with_proot (store
, argc
, argv
);
596 /* The "performance" engine: choose performance over robustness. */
598 exec_performance (const char *store
, int argc
, char *argv
[])
602 exec_in_user_namespace (store
, argc
, argv
);
603 #if HAVE_EXEC_WITH_LOADER
604 exec_with_loader (store
, argc
, argv
);
608 /* List of supported engines. */
609 static const struct engine engines
[] =
611 { "default", exec_default
},
612 { "performance", exec_performance
},
613 { "userns", exec_in_user_namespace
},
615 { "proot", exec_with_proot
},
617 #if HAVE_EXEC_WITH_LOADER
618 { "fakechroot", exec_with_loader
},
623 /* Return the "execution engine" to use. */
624 static const struct engine
*
625 execution_engine (void)
627 const char *str
= getenv ("GUIX_EXECUTION_ENGINE");
633 for (const struct engine
*engine
= engines
;
634 engine
->name
!= NULL
;
637 if (strcmp (engine
->name
, str
) == 0)
641 fprintf (stderr
, "%s: unsupported Guix execution engine; ignoring\n",
649 main (int argc
, char *argv
[])
653 size
= readlink ("/proc/self/exe", self
, sizeof self
- 1);
656 /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
657 want to extract "/home/ludo/.local/gnu/store". */
658 size_t index
= strlen (self
)
659 - strlen (WRAPPER_PROGRAM
) + strlen (original_store
);
660 char *store
= strdup (self
);
665 /* If STORE is already at the "right" place, we can execute
666 @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
667 needed when running one of these wrappers from within an unshare'd
668 namespace, because 'unshare' fails with EPERM in that context. */
669 if (strcmp (store
, original_store
) != 0
670 && lstat ("@WRAPPED_PROGRAM@", &statbuf
) != 0)
672 const struct engine
*engine
= execution_engine ();
673 engine
->exec (store
, argc
, argv
);
675 /* If we reach this point, that's because ENGINE failed to do the
678 This may be because \"user namespaces\" are not supported on this system.\n\
679 Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
680 unless you move it to the '@STORE_DIRECTORY@' directory.\n\
682 Please refer to the 'guix pack' documentation for more information.\n");
686 /* The executable is available under @STORE_DIRECTORY@, so we can now
688 int err
= execv ("@WRAPPED_PROGRAM@", argv
);
690 assert_perror (errno
);