Commit | Line | Data |
---|---|---|
47a60325 | 1 | /* GNU Guix --- Functional package management for GNU |
2520059b | 2 | Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org> |
47a60325 LC |
3 | |
4 | This file is part of GNU Guix. | |
5 | ||
6 | GNU Guix is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3 of the License, or (at | |
9 | your option) any later version. | |
10 | ||
11 | GNU Guix is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | /* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate | |
20 | mount namespace where the store is mounted in its right place. | |
21 | ||
22 | We would happily do that in Scheme using 'call-with-container'. However, | |
23 | this very program needs to be relocatable, so it needs to be statically | |
24 | linked, which complicates things (Guile's modules can hardly be "linked" | |
25 | into a single executable.) */ | |
26 | ||
27 | #define _GNU_SOURCE | |
28 | #include <stdlib.h> | |
29 | #include <stdio.h> | |
30 | #include <unistd.h> | |
31 | #include <sched.h> | |
32 | #include <sys/mount.h> | |
33 | #include <errno.h> | |
34 | #include <libgen.h> | |
35 | #include <limits.h> | |
36 | #include <string.h> | |
37 | #include <assert.h> | |
38 | #include <sys/stat.h> | |
39 | #include <sys/types.h> | |
40 | #include <sys/wait.h> | |
41 | #include <fcntl.h> | |
42 | #include <dirent.h> | |
30da3173 | 43 | #include <sys/syscall.h> |
47a60325 | 44 | |
0cd13e72 LC |
45 | /* The original store, "/gnu/store" by default. */ |
46 | static const char original_store[] = "@STORE_DIRECTORY@"; | |
47 | ||
48 | ||
14928af2 LC |
49 | /* Like 'malloc', but abort if 'malloc' returns NULL. */ |
50 | static void * | |
51 | xmalloc (size_t size) | |
52 | { | |
53 | void *result = malloc (size); | |
54 | assert (result != NULL); | |
55 | return result; | |
56 | } | |
57 | ||
47a60325 LC |
58 | /* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the |
59 | caller must eventually free. */ | |
60 | static char * | |
61 | concat (const char *directory, const char *file) | |
62 | { | |
14928af2 | 63 | char *result = xmalloc (strlen (directory) + 2 + strlen (file)); |
47a60325 LC |
64 | |
65 | strcpy (result, directory); | |
66 | strcat (result, "/"); | |
67 | strcat (result, file); | |
68 | return result; | |
69 | } | |
70 | ||
71 | static void | |
72 | mkdir_p (const char *directory) | |
73 | { | |
74 | if (strcmp (directory, "/") != 0) | |
75 | { | |
76 | char *parent = dirname (strdupa (directory)); | |
77 | mkdir_p (parent); | |
78 | int err = mkdir (directory, 0700); | |
79 | if (err < 0 && errno != EEXIST) | |
80 | assert_perror (errno); | |
81 | } | |
82 | } | |
83 | ||
84 | static void | |
85 | rm_rf (const char *directory) | |
86 | { | |
87 | DIR *stream = opendir (directory); | |
88 | ||
89 | for (struct dirent *entry = readdir (stream); | |
90 | entry != NULL; | |
91 | entry = readdir (stream)) | |
92 | { | |
93 | if (strcmp (entry->d_name, ".") == 0 | |
94 | || strcmp (entry->d_name, "..") == 0) | |
95 | continue; | |
96 | ||
97 | char *full = concat (directory, entry->d_name); | |
98 | ||
99 | int err = unlink (full); | |
100 | if (err < 0) | |
101 | { | |
102 | if (errno == EISDIR) | |
103 | /* Recurse (we expect a shallow directory structure so there's | |
104 | little risk of stack overflow.) */ | |
105 | rm_rf (full); | |
106 | else | |
107 | assert_perror (errno); | |
108 | } | |
109 | ||
110 | free (full); | |
111 | } | |
112 | ||
113 | closedir (stream); | |
114 | ||
115 | int err = rmdir (directory); | |
116 | if (err < 0 && errno != ENOENT) | |
117 | assert_perror (errno); | |
118 | } | |
119 | ||
120 | /* Bind mount all the top-level entries in SOURCE to TARGET. */ | |
121 | static void | |
122 | bind_mount (const char *source, const char *target) | |
123 | { | |
124 | DIR *stream = opendir (source); | |
125 | ||
126 | for (struct dirent *entry = readdir (stream); | |
127 | entry != NULL; | |
128 | entry = readdir (stream)) | |
129 | { | |
130 | /* XXX: Some file systems may not report a useful 'd_type'. Ignore them | |
131 | for now. */ | |
132 | assert (entry->d_type != DT_UNKNOWN); | |
133 | ||
134 | if (strcmp (entry->d_name, ".") == 0 | |
135 | || strcmp (entry->d_name, "..") == 0) | |
136 | continue; | |
137 | ||
138 | char *abs_source = concat (source, entry->d_name); | |
139 | char *new_entry = concat (target, entry->d_name); | |
140 | ||
141 | if (entry->d_type == DT_LNK) | |
142 | { | |
143 | char target[PATH_MAX]; | |
144 | ||
145 | ssize_t result = readlink (abs_source, target, sizeof target - 1); | |
146 | if (result > 0) | |
147 | { | |
148 | target[result] = '\0'; | |
149 | int err = symlink (target, new_entry); | |
150 | if (err < 0) | |
151 | assert_perror (errno); | |
152 | } | |
153 | } | |
154 | else | |
155 | { | |
156 | /* Create the mount point. */ | |
157 | if (entry->d_type == DT_DIR) | |
158 | { | |
159 | int err = mkdir (new_entry, 0700); | |
160 | if (err != 0) | |
161 | assert_perror (errno); | |
162 | } | |
163 | else | |
164 | close (open (new_entry, O_WRONLY | O_CREAT)); | |
165 | ||
166 | int err = mount (abs_source, new_entry, "none", | |
167 | MS_BIND | MS_REC | MS_RDONLY, NULL); | |
168 | ||
169 | /* It used to be that only directories could be bind-mounted. Thus, | |
170 | keep going if we fail to bind-mount a non-directory entry. | |
171 | That's OK because regular files in the root file system are | |
172 | usually uninteresting. */ | |
173 | if (err != 0 && entry->d_type != DT_DIR) | |
174 | assert_perror (errno); | |
175 | ||
176 | free (new_entry); | |
177 | free (abs_source); | |
178 | } | |
179 | } | |
180 | ||
181 | closedir (stream); | |
182 | } | |
183 | ||
30da3173 LC |
184 | /* Write the user/group ID map for PID to FILE, mapping ID to itself. See |
185 | user_namespaces(7). */ | |
186 | static void | |
187 | write_id_map (pid_t pid, const char *file, int id) | |
188 | { | |
189 | char id_map_file[100]; | |
190 | snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file); | |
191 | ||
192 | char id_map[100]; | |
193 | ||
194 | /* Map root and the current user. */ | |
195 | int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id); | |
196 | int fd = open (id_map_file, O_WRONLY); | |
197 | if (fd < 0) | |
198 | assert_perror (errno); | |
199 | ||
200 | int n = write (fd, id_map, len); | |
201 | if (n < 0) | |
202 | assert_perror (errno); | |
203 | ||
204 | close (fd); | |
205 | } | |
206 | ||
207 | /* Disallow setgroups(2) for PID. */ | |
208 | static void | |
209 | disallow_setgroups (pid_t pid) | |
210 | { | |
211 | char file[100]; | |
212 | ||
213 | snprintf (file, sizeof file, "/proc/%d/setgroups", pid); | |
214 | ||
215 | int fd = open (file, O_WRONLY); | |
216 | if (fd < 0) | |
217 | assert_perror (errno); | |
218 | ||
219 | int err = write (fd, "deny", 5); | |
220 | if (err < 0) | |
221 | assert_perror (errno); | |
222 | ||
223 | close (fd); | |
224 | } | |
225 | ||
bdb9b4e8 LC |
226 | /* Run the wrapper program in a separate mount user namespace. Return only |
227 | upon failure. */ | |
228 | static void | |
229 | exec_in_user_namespace (const char *store, int argc, char *argv[]) | |
230 | { | |
231 | /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is | |
232 | bind-mounted in the right place. */ | |
233 | int err; | |
234 | char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); | |
0cd13e72 | 235 | char *new_store = concat (new_root, original_store); |
bdb9b4e8 LC |
236 | char *cwd = get_current_dir_name (); |
237 | ||
238 | /* Create a child with separate namespaces and set up bind-mounts from | |
239 | there. That way, bind-mounts automatically disappear when the child | |
240 | exits, which simplifies cleanup for the parent. Note: clone is more | |
241 | convenient than fork + unshare since the parent can directly write | |
242 | the child uid_map/gid_map files. */ | |
243 | pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER, | |
244 | NULL, NULL, NULL); | |
245 | switch (child) | |
246 | { | |
247 | case 0: | |
248 | /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461> | |
249 | we cannot make NEW_ROOT a tmpfs (which would have saved the need | |
250 | for 'rm_rf'.) */ | |
251 | bind_mount ("/", new_root); | |
252 | mkdir_p (new_store); | |
253 | err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY, | |
254 | NULL); | |
255 | if (err < 0) | |
256 | assert_perror (errno); | |
257 | ||
258 | chdir (new_root); | |
259 | err = chroot (new_root); | |
260 | if (err < 0) | |
261 | assert_perror (errno); | |
262 | ||
263 | /* Change back to where we were before chroot'ing. */ | |
264 | chdir (cwd); | |
265 | ||
266 | int err = execv ("@WRAPPED_PROGRAM@", argv); | |
267 | if (err < 0) | |
268 | assert_perror (errno); | |
269 | break; | |
270 | ||
271 | case -1: | |
272 | /* Failure: user namespaces not supported. */ | |
273 | fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]); | |
274 | rm_rf (new_root); | |
275 | break; | |
276 | ||
277 | default: | |
278 | { | |
279 | /* Map the current user/group ID in the child's namespace (the | |
280 | default is to get the "overflow UID", i.e., the UID of | |
281 | "nobody"). We must first disallow 'setgroups' for that | |
282 | process. */ | |
283 | disallow_setgroups (child); | |
284 | write_id_map (child, "uid_map", getuid ()); | |
285 | write_id_map (child, "gid_map", getgid ()); | |
286 | ||
287 | int status; | |
288 | waitpid (child, &status, 0); | |
289 | chdir ("/"); /* avoid EBUSY */ | |
290 | rm_rf (new_root); | |
291 | free (new_root); | |
292 | ||
293 | if (WIFEXITED (status)) | |
294 | exit (WEXITSTATUS (status)); | |
295 | else | |
296 | /* Abnormal termination cannot really be reproduced, so exit | |
297 | with 255. */ | |
298 | exit (255); | |
299 | } | |
300 | } | |
301 | } | |
302 | ||
47a60325 | 303 | \f |
99aec37a LC |
304 | #ifdef PROOT_PROGRAM |
305 | ||
306 | /* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and | |
307 | "bind-mounting" STORE in the right place. */ | |
308 | static void | |
309 | exec_with_proot (const char *store, int argc, char *argv[]) | |
310 | { | |
311 | int proot_specific_argc = 4; | |
312 | int proot_argc = argc + proot_specific_argc; | |
4a53c19a | 313 | char *proot_argv[proot_argc + 1], *proot; |
0cd13e72 | 314 | char bind_spec[strlen (store) + 1 + sizeof original_store]; |
99aec37a LC |
315 | |
316 | strcpy (bind_spec, store); | |
317 | strcat (bind_spec, ":"); | |
0cd13e72 | 318 | strcat (bind_spec, original_store); |
99aec37a LC |
319 | |
320 | proot = concat (store, PROOT_PROGRAM); | |
321 | ||
322 | proot_argv[0] = proot; | |
323 | proot_argv[1] = "-b"; | |
324 | proot_argv[2] = bind_spec; | |
325 | proot_argv[3] = "@WRAPPED_PROGRAM@"; | |
326 | ||
327 | for (int i = 0; i < argc; i++) | |
328 | proot_argv[i + proot_specific_argc] = argv[i + 1]; | |
329 | ||
330 | proot_argv[proot_argc] = NULL; | |
331 | ||
332 | /* Seccomp support seems to invariably lead to segfaults; disable it by | |
333 | default. */ | |
334 | setenv ("PROOT_NO_SECCOMP", "1", 0); | |
335 | ||
336 | int err = execv (proot, proot_argv); | |
337 | if (err < 0) | |
338 | assert_perror (errno); | |
339 | } | |
340 | ||
341 | #endif | |
342 | ||
343 | \f | |
fde2aec3 LC |
344 | /* Execution engines. */ |
345 | ||
346 | struct engine | |
347 | { | |
348 | const char *name; | |
349 | void (* exec) (const char *, int, char **); | |
350 | }; | |
351 | ||
352 | static void | |
353 | buffer_stderr (void) | |
354 | { | |
355 | static char stderr_buffer[4096]; | |
356 | setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer); | |
357 | } | |
358 | ||
359 | /* The default engine. */ | |
360 | static void | |
361 | exec_default (const char *store, int argc, char *argv[]) | |
362 | { | |
363 | /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace' | |
364 | fails but 'exec_with_proot' works. */ | |
365 | buffer_stderr (); | |
366 | ||
367 | exec_in_user_namespace (store, argc, argv); | |
368 | #ifdef PROOT_PROGRAM | |
369 | exec_with_proot (store, argc, argv); | |
370 | #endif | |
371 | } | |
372 | ||
373 | /* List of supported engines. */ | |
374 | static const struct engine engines[] = | |
375 | { | |
376 | { "default", exec_default }, | |
377 | { "userns", exec_in_user_namespace }, | |
378 | #ifdef PROOT_PROGRAM | |
379 | { "proot", exec_with_proot }, | |
380 | #endif | |
381 | { NULL, NULL } | |
382 | }; | |
383 | ||
384 | /* Return the "execution engine" to use. */ | |
385 | static const struct engine * | |
386 | execution_engine (void) | |
387 | { | |
388 | const char *str = getenv ("GUIX_EXECUTION_ENGINE"); | |
389 | ||
390 | if (str == NULL) | |
391 | str = "default"; | |
392 | ||
393 | try: | |
394 | for (const struct engine *engine = engines; | |
395 | engine->name != NULL; | |
396 | engine++) | |
397 | { | |
398 | if (strcmp (engine->name, str) == 0) | |
399 | return engine; | |
400 | } | |
401 | ||
402 | fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n", | |
403 | str); | |
404 | str = "default"; | |
405 | goto try; | |
406 | } | |
407 | ||
408 | \f | |
47a60325 LC |
409 | int |
410 | main (int argc, char *argv[]) | |
411 | { | |
412 | ssize_t size; | |
413 | char self[PATH_MAX]; | |
414 | size = readlink ("/proc/self/exe", self, sizeof self - 1); | |
415 | assert (size > 0); | |
416 | ||
417 | /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we | |
418 | want to extract "/home/ludo/.local/gnu/store". */ | |
419 | size_t index = strlen (self) | |
0cd13e72 | 420 | - strlen ("@WRAPPED_PROGRAM@") + strlen (original_store); |
47a60325 LC |
421 | char *store = strdup (self); |
422 | store[index] = '\0'; | |
423 | ||
424 | struct stat statbuf; | |
425 | ||
426 | /* If STORE is already at the "right" place, we can execute | |
427 | @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's | |
428 | needed when running one of these wrappers from within an unshare'd | |
429 | namespace, because 'unshare' fails with EPERM in that context. */ | |
0cd13e72 | 430 | if (strcmp (store, original_store) != 0 |
47a60325 LC |
431 | && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0) |
432 | { | |
fde2aec3 LC |
433 | const struct engine *engine = execution_engine (); |
434 | engine->exec (store, argc, argv); | |
47a60325 | 435 | |
fde2aec3 LC |
436 | /* If we reach this point, that's because ENGINE failed to do the |
437 | job. */ | |
bdb9b4e8 | 438 | fprintf (stderr, "\ |
30da3173 LC |
439 | This may be because \"user namespaces\" are not supported on this system.\n\ |
440 | Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\ | |
441 | unless you move it to the '@STORE_DIRECTORY@' directory.\n\ | |
442 | \n\ | |
443 | Please refer to the 'guix pack' documentation for more information.\n"); | |
bdb9b4e8 | 444 | return EXIT_FAILURE; |
47a60325 LC |
445 | } |
446 | ||
447 | /* The executable is available under @STORE_DIRECTORY@, so we can now | |
448 | execute it. */ | |
449 | int err = execv ("@WRAPPED_PROGRAM@", argv); | |
450 | if (err < 0) | |
451 | assert_perror (errno); | |
452 | ||
453 | return EXIT_FAILURE; | |
454 | } |