1 /* GNU Guix --- Functional package management for GNU
2 Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
4 This file is part of GNU Guix.
6 GNU Guix is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or (at
9 your option) any later version.
11 GNU Guix is distributed in the hope that it will be useful, but
12 WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
19 /* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
20 mount namespace where the store is mounted in its right place.
22 We would happily do that in Scheme using 'call-with-container'. However,
23 this very program needs to be relocatable, so it needs to be statically
24 linked, which complicates things (Guile's modules can hardly be "linked"
25 into a single executable.) */
32 #include <sys/mount.h>
39 #include <sys/types.h>
43 #include <sys/syscall.h>
45 /* The original store, "/gnu/store" by default. */
46 static const char original_store
[] = "@STORE_DIRECTORY@";
49 /* Like 'malloc', but abort if 'malloc' returns NULL. */
53 void *result
= malloc (size
);
54 assert (result
!= NULL
);
58 /* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
59 caller must eventually free. */
61 concat (const char *directory
, const char *file
)
63 char *result
= xmalloc (strlen (directory
) + 2 + strlen (file
));
65 strcpy (result
, directory
);
67 strcat (result
, file
);
72 mkdir_p (const char *directory
)
74 if (strcmp (directory
, "/") != 0)
76 char *parent
= dirname (strdupa (directory
));
78 int err
= mkdir (directory
, 0700);
79 if (err
< 0 && errno
!= EEXIST
)
80 assert_perror (errno
);
85 rm_rf (const char *directory
)
87 DIR *stream
= opendir (directory
);
89 for (struct dirent
*entry
= readdir (stream
);
91 entry
= readdir (stream
))
93 if (strcmp (entry
->d_name
, ".") == 0
94 || strcmp (entry
->d_name
, "..") == 0)
97 char *full
= concat (directory
, entry
->d_name
);
99 int err
= unlink (full
);
103 /* Recurse (we expect a shallow directory structure so there's
104 little risk of stack overflow.) */
107 assert_perror (errno
);
115 int err
= rmdir (directory
);
116 if (err
< 0 && errno
!= ENOENT
)
117 assert_perror (errno
);
120 /* Bind mount all the top-level entries in SOURCE to TARGET. */
122 bind_mount (const char *source
, const char *target
)
124 DIR *stream
= opendir (source
);
126 for (struct dirent
*entry
= readdir (stream
);
128 entry
= readdir (stream
))
130 /* XXX: Some file systems may not report a useful 'd_type'. Ignore them
132 assert (entry
->d_type
!= DT_UNKNOWN
);
134 if (strcmp (entry
->d_name
, ".") == 0
135 || strcmp (entry
->d_name
, "..") == 0)
138 char *abs_source
= concat (source
, entry
->d_name
);
139 char *new_entry
= concat (target
, entry
->d_name
);
141 if (entry
->d_type
== DT_LNK
)
143 char target
[PATH_MAX
];
145 ssize_t result
= readlink (abs_source
, target
, sizeof target
- 1);
148 target
[result
] = '\0';
149 int err
= symlink (target
, new_entry
);
151 assert_perror (errno
);
156 /* Create the mount point. */
157 if (entry
->d_type
== DT_DIR
)
159 int err
= mkdir (new_entry
, 0700);
161 assert_perror (errno
);
164 close (open (new_entry
, O_WRONLY
| O_CREAT
));
166 int err
= mount (abs_source
, new_entry
, "none",
167 MS_BIND
| MS_REC
| MS_RDONLY
, NULL
);
169 /* It used to be that only directories could be bind-mounted. Thus,
170 keep going if we fail to bind-mount a non-directory entry.
171 That's OK because regular files in the root file system are
172 usually uninteresting. */
173 if (err
!= 0 && entry
->d_type
!= DT_DIR
)
174 assert_perror (errno
);
184 /* Write the user/group ID map for PID to FILE, mapping ID to itself. See
185 user_namespaces(7). */
187 write_id_map (pid_t pid
, const char *file
, int id
)
189 char id_map_file
[100];
190 snprintf (id_map_file
, sizeof id_map_file
, "/proc/%d/%s", pid
, file
);
194 /* Map root and the current user. */
195 int len
= snprintf (id_map
, sizeof id_map
, "%d %d 1\n", id
, id
);
196 int fd
= open (id_map_file
, O_WRONLY
);
198 assert_perror (errno
);
200 int n
= write (fd
, id_map
, len
);
202 assert_perror (errno
);
207 /* Disallow setgroups(2) for PID. */
209 disallow_setgroups (pid_t pid
)
213 snprintf (file
, sizeof file
, "/proc/%d/setgroups", pid
);
215 int fd
= open (file
, O_WRONLY
);
217 assert_perror (errno
);
219 int err
= write (fd
, "deny", 5);
221 assert_perror (errno
);
226 /* Run the wrapper program in a separate mount user namespace. Return only
229 exec_in_user_namespace (const char *store
, int argc
, char *argv
[])
231 /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
232 bind-mounted in the right place. */
234 char *new_root
= mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
235 char *new_store
= concat (new_root
, original_store
);
236 char *cwd
= get_current_dir_name ();
238 /* Create a child with separate namespaces and set up bind-mounts from
239 there. That way, bind-mounts automatically disappear when the child
240 exits, which simplifies cleanup for the parent. Note: clone is more
241 convenient than fork + unshare since the parent can directly write
242 the child uid_map/gid_map files. */
243 pid_t child
= syscall (SYS_clone
, SIGCHLD
| CLONE_NEWNS
| CLONE_NEWUSER
,
248 /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
249 we cannot make NEW_ROOT a tmpfs (which would have saved the need
251 bind_mount ("/", new_root
);
253 err
= mount (store
, new_store
, "none", MS_BIND
| MS_REC
| MS_RDONLY
,
256 assert_perror (errno
);
259 err
= chroot (new_root
);
261 assert_perror (errno
);
263 /* Change back to where we were before chroot'ing. */
266 int err
= execv ("@WRAPPED_PROGRAM@", argv
);
268 assert_perror (errno
);
272 /* Failure: user namespaces not supported. */
273 fprintf (stderr
, "%s: error: 'clone' failed: %m\n", argv
[0]);
279 /* Map the current user/group ID in the child's namespace (the
280 default is to get the "overflow UID", i.e., the UID of
281 "nobody"). We must first disallow 'setgroups' for that
283 disallow_setgroups (child
);
284 write_id_map (child
, "uid_map", getuid ());
285 write_id_map (child
, "gid_map", getgid ());
288 waitpid (child
, &status
, 0);
289 chdir ("/"); /* avoid EBUSY */
293 if (WIFEXITED (status
))
294 exit (WEXITSTATUS (status
));
296 /* Abnormal termination cannot really be reproduced, so exit
306 /* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
307 "bind-mounting" STORE in the right place. */
309 exec_with_proot (const char *store
, int argc
, char *argv
[])
311 int proot_specific_argc
= 4;
312 int proot_argc
= argc
+ proot_specific_argc
;
313 char *proot_argv
[proot_argc
+ 1], *proot
;
314 char bind_spec
[strlen (store
) + 1 + sizeof original_store
];
316 strcpy (bind_spec
, store
);
317 strcat (bind_spec
, ":");
318 strcat (bind_spec
, original_store
);
320 proot
= concat (store
, PROOT_PROGRAM
);
322 proot_argv
[0] = proot
;
323 proot_argv
[1] = "-b";
324 proot_argv
[2] = bind_spec
;
325 proot_argv
[3] = "@WRAPPED_PROGRAM@";
327 for (int i
= 0; i
< argc
; i
++)
328 proot_argv
[i
+ proot_specific_argc
] = argv
[i
+ 1];
330 proot_argv
[proot_argc
] = NULL
;
332 /* Seccomp support seems to invariably lead to segfaults; disable it by
334 setenv ("PROOT_NO_SECCOMP", "1", 0);
336 int err
= execv (proot
, proot_argv
);
338 assert_perror (errno
);
344 /* Execution engines. */
349 void (* exec
) (const char *, int, char **);
355 static char stderr_buffer
[4096];
356 setvbuf (stderr
, stderr_buffer
, _IOFBF
, sizeof stderr_buffer
);
359 /* The default engine. */
361 exec_default (const char *store
, int argc
, char *argv
[])
363 /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
364 fails but 'exec_with_proot' works. */
367 exec_in_user_namespace (store
, argc
, argv
);
369 exec_with_proot (store
, argc
, argv
);
373 /* List of supported engines. */
374 static const struct engine engines
[] =
376 { "default", exec_default
},
377 { "userns", exec_in_user_namespace
},
379 { "proot", exec_with_proot
},
384 /* Return the "execution engine" to use. */
385 static const struct engine
*
386 execution_engine (void)
388 const char *str
= getenv ("GUIX_EXECUTION_ENGINE");
394 for (const struct engine
*engine
= engines
;
395 engine
->name
!= NULL
;
398 if (strcmp (engine
->name
, str
) == 0)
402 fprintf (stderr
, "%s: unsupported Guix execution engine; ignoring\n",
410 main (int argc
, char *argv
[])
414 size
= readlink ("/proc/self/exe", self
, sizeof self
- 1);
417 /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
418 want to extract "/home/ludo/.local/gnu/store". */
419 size_t index
= strlen (self
)
420 - strlen ("@WRAPPED_PROGRAM@") + strlen (original_store
);
421 char *store
= strdup (self
);
426 /* If STORE is already at the "right" place, we can execute
427 @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
428 needed when running one of these wrappers from within an unshare'd
429 namespace, because 'unshare' fails with EPERM in that context. */
430 if (strcmp (store
, original_store
) != 0
431 && lstat ("@WRAPPED_PROGRAM@", &statbuf
) != 0)
433 const struct engine
*engine
= execution_engine ();
434 engine
->exec (store
, argc
, argv
);
436 /* If we reach this point, that's because ENGINE failed to do the
439 This may be because \"user namespaces\" are not supported on this system.\n\
440 Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
441 unless you move it to the '@STORE_DIRECTORY@' directory.\n\
443 Please refer to the 'guix pack' documentation for more information.\n");
447 /* The executable is available under @STORE_DIRECTORY@, so we can now
449 int err
= execv ("@WRAPPED_PROGRAM@", argv
);
451 assert_perror (errno
);