Commit | Line | Data |
---|---|---|
47a60325 | 1 | /* GNU Guix --- Functional package management for GNU |
2520059b | 2 | Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org> |
47a60325 LC |
3 | |
4 | This file is part of GNU Guix. | |
5 | ||
6 | GNU Guix is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3 of the License, or (at | |
9 | your option) any later version. | |
10 | ||
11 | GNU Guix is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | /* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate | |
20 | mount namespace where the store is mounted in its right place. | |
21 | ||
22 | We would happily do that in Scheme using 'call-with-container'. However, | |
23 | this very program needs to be relocatable, so it needs to be statically | |
24 | linked, which complicates things (Guile's modules can hardly be "linked" | |
25 | into a single executable.) */ | |
26 | ||
27 | #define _GNU_SOURCE | |
28 | #include <stdlib.h> | |
29 | #include <stdio.h> | |
30 | #include <unistd.h> | |
31 | #include <sched.h> | |
32 | #include <sys/mount.h> | |
33 | #include <errno.h> | |
34 | #include <libgen.h> | |
35 | #include <limits.h> | |
36 | #include <string.h> | |
37 | #include <assert.h> | |
38 | #include <sys/stat.h> | |
39 | #include <sys/types.h> | |
40 | #include <sys/wait.h> | |
41 | #include <fcntl.h> | |
42 | #include <dirent.h> | |
30da3173 | 43 | #include <sys/syscall.h> |
47a60325 | 44 | |
64562321 LC |
45 | /* Whether we're building the ld.so/libfakechroot wrapper. */ |
46 | #define HAVE_EXEC_WITH_LOADER \ | |
47 | (defined PROGRAM_INTERPRETER) && (defined LOADER_AUDIT_MODULE) \ | |
48 | && (defined FAKECHROOT_LIBRARY) | |
49 | ||
0cd13e72 LC |
50 | /* The original store, "/gnu/store" by default. */ |
51 | static const char original_store[] = "@STORE_DIRECTORY@"; | |
52 | ||
53 | ||
14928af2 LC |
54 | /* Like 'malloc', but abort if 'malloc' returns NULL. */ |
55 | static void * | |
56 | xmalloc (size_t size) | |
57 | { | |
58 | void *result = malloc (size); | |
59 | assert (result != NULL); | |
60 | return result; | |
61 | } | |
62 | ||
47a60325 LC |
63 | /* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the |
64 | caller must eventually free. */ | |
65 | static char * | |
66 | concat (const char *directory, const char *file) | |
67 | { | |
14928af2 | 68 | char *result = xmalloc (strlen (directory) + 2 + strlen (file)); |
47a60325 LC |
69 | |
70 | strcpy (result, directory); | |
71 | strcat (result, "/"); | |
72 | strcat (result, file); | |
73 | return result; | |
74 | } | |
75 | ||
76 | static void | |
77 | mkdir_p (const char *directory) | |
78 | { | |
79 | if (strcmp (directory, "/") != 0) | |
80 | { | |
81 | char *parent = dirname (strdupa (directory)); | |
82 | mkdir_p (parent); | |
83 | int err = mkdir (directory, 0700); | |
84 | if (err < 0 && errno != EEXIST) | |
85 | assert_perror (errno); | |
86 | } | |
87 | } | |
88 | ||
89 | static void | |
90 | rm_rf (const char *directory) | |
91 | { | |
92 | DIR *stream = opendir (directory); | |
93 | ||
94 | for (struct dirent *entry = readdir (stream); | |
95 | entry != NULL; | |
96 | entry = readdir (stream)) | |
97 | { | |
98 | if (strcmp (entry->d_name, ".") == 0 | |
99 | || strcmp (entry->d_name, "..") == 0) | |
100 | continue; | |
101 | ||
102 | char *full = concat (directory, entry->d_name); | |
103 | ||
104 | int err = unlink (full); | |
105 | if (err < 0) | |
106 | { | |
107 | if (errno == EISDIR) | |
108 | /* Recurse (we expect a shallow directory structure so there's | |
109 | little risk of stack overflow.) */ | |
110 | rm_rf (full); | |
111 | else | |
112 | assert_perror (errno); | |
113 | } | |
114 | ||
115 | free (full); | |
116 | } | |
117 | ||
118 | closedir (stream); | |
119 | ||
120 | int err = rmdir (directory); | |
121 | if (err < 0 && errno != ENOENT) | |
122 | assert_perror (errno); | |
123 | } | |
124 | ||
64562321 LC |
125 | /* Make TARGET a bind-mount of SOURCE. Take into account ENTRY's type, which |
126 | corresponds to SOURCE. */ | |
127 | static int | |
128 | bind_mount (const char *source, const struct dirent *entry, | |
129 | const char *target) | |
130 | { | |
131 | if (entry->d_type == DT_DIR) | |
132 | { | |
133 | int err = mkdir (target, 0700); | |
134 | if (err != 0) | |
135 | return err; | |
136 | } | |
137 | else | |
138 | close (open (target, O_WRONLY | O_CREAT)); | |
139 | ||
140 | return mount (source, target, "none", | |
141 | MS_BIND | MS_REC | MS_RDONLY, NULL); | |
142 | } | |
143 | ||
144 | #if HAVE_EXEC_WITH_LOADER | |
145 | ||
146 | /* Make TARGET a symlink to SOURCE. */ | |
147 | static int | |
148 | make_symlink (const char *source, const struct dirent *entry, | |
149 | const char *target) | |
150 | { | |
151 | return symlink (source, target); | |
152 | } | |
153 | ||
154 | #endif | |
155 | ||
156 | /* Mirror with FIRMLINK all the top-level entries in SOURCE to TARGET. */ | |
47a60325 | 157 | static void |
64562321 LC |
158 | mirror_directory (const char *source, const char *target, |
159 | int (* firmlink) (const char *, const struct dirent *, | |
160 | const char *)) | |
47a60325 LC |
161 | { |
162 | DIR *stream = opendir (source); | |
163 | ||
164 | for (struct dirent *entry = readdir (stream); | |
165 | entry != NULL; | |
166 | entry = readdir (stream)) | |
167 | { | |
168 | /* XXX: Some file systems may not report a useful 'd_type'. Ignore them | |
169 | for now. */ | |
170 | assert (entry->d_type != DT_UNKNOWN); | |
171 | ||
172 | if (strcmp (entry->d_name, ".") == 0 | |
173 | || strcmp (entry->d_name, "..") == 0) | |
174 | continue; | |
175 | ||
176 | char *abs_source = concat (source, entry->d_name); | |
177 | char *new_entry = concat (target, entry->d_name); | |
178 | ||
179 | if (entry->d_type == DT_LNK) | |
180 | { | |
181 | char target[PATH_MAX]; | |
182 | ||
183 | ssize_t result = readlink (abs_source, target, sizeof target - 1); | |
184 | if (result > 0) | |
185 | { | |
186 | target[result] = '\0'; | |
187 | int err = symlink (target, new_entry); | |
188 | if (err < 0) | |
189 | assert_perror (errno); | |
190 | } | |
191 | } | |
192 | else | |
193 | { | |
194 | /* Create the mount point. */ | |
64562321 | 195 | int err = firmlink (abs_source, entry, new_entry); |
47a60325 LC |
196 | |
197 | /* It used to be that only directories could be bind-mounted. Thus, | |
198 | keep going if we fail to bind-mount a non-directory entry. | |
199 | That's OK because regular files in the root file system are | |
200 | usually uninteresting. */ | |
201 | if (err != 0 && entry->d_type != DT_DIR) | |
202 | assert_perror (errno); | |
203 | ||
204 | free (new_entry); | |
205 | free (abs_source); | |
206 | } | |
207 | } | |
208 | ||
209 | closedir (stream); | |
210 | } | |
211 | ||
30da3173 LC |
212 | /* Write the user/group ID map for PID to FILE, mapping ID to itself. See |
213 | user_namespaces(7). */ | |
214 | static void | |
215 | write_id_map (pid_t pid, const char *file, int id) | |
216 | { | |
217 | char id_map_file[100]; | |
218 | snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file); | |
219 | ||
220 | char id_map[100]; | |
221 | ||
222 | /* Map root and the current user. */ | |
223 | int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id); | |
224 | int fd = open (id_map_file, O_WRONLY); | |
225 | if (fd < 0) | |
226 | assert_perror (errno); | |
227 | ||
228 | int n = write (fd, id_map, len); | |
229 | if (n < 0) | |
230 | assert_perror (errno); | |
231 | ||
232 | close (fd); | |
233 | } | |
234 | ||
235 | /* Disallow setgroups(2) for PID. */ | |
236 | static void | |
237 | disallow_setgroups (pid_t pid) | |
238 | { | |
239 | char file[100]; | |
240 | ||
241 | snprintf (file, sizeof file, "/proc/%d/setgroups", pid); | |
242 | ||
243 | int fd = open (file, O_WRONLY); | |
244 | if (fd < 0) | |
245 | assert_perror (errno); | |
246 | ||
247 | int err = write (fd, "deny", 5); | |
248 | if (err < 0) | |
249 | assert_perror (errno); | |
250 | ||
251 | close (fd); | |
252 | } | |
253 | ||
bdb9b4e8 LC |
254 | /* Run the wrapper program in a separate mount user namespace. Return only |
255 | upon failure. */ | |
256 | static void | |
257 | exec_in_user_namespace (const char *store, int argc, char *argv[]) | |
258 | { | |
259 | /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is | |
260 | bind-mounted in the right place. */ | |
261 | int err; | |
262 | char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); | |
0cd13e72 | 263 | char *new_store = concat (new_root, original_store); |
bdb9b4e8 LC |
264 | char *cwd = get_current_dir_name (); |
265 | ||
266 | /* Create a child with separate namespaces and set up bind-mounts from | |
267 | there. That way, bind-mounts automatically disappear when the child | |
268 | exits, which simplifies cleanup for the parent. Note: clone is more | |
269 | convenient than fork + unshare since the parent can directly write | |
270 | the child uid_map/gid_map files. */ | |
271 | pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER, | |
272 | NULL, NULL, NULL); | |
273 | switch (child) | |
274 | { | |
275 | case 0: | |
276 | /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461> | |
277 | we cannot make NEW_ROOT a tmpfs (which would have saved the need | |
278 | for 'rm_rf'.) */ | |
64562321 | 279 | mirror_directory ("/", new_root, bind_mount); |
bdb9b4e8 LC |
280 | mkdir_p (new_store); |
281 | err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY, | |
282 | NULL); | |
283 | if (err < 0) | |
284 | assert_perror (errno); | |
285 | ||
286 | chdir (new_root); | |
287 | err = chroot (new_root); | |
288 | if (err < 0) | |
289 | assert_perror (errno); | |
290 | ||
291 | /* Change back to where we were before chroot'ing. */ | |
292 | chdir (cwd); | |
293 | ||
294 | int err = execv ("@WRAPPED_PROGRAM@", argv); | |
295 | if (err < 0) | |
296 | assert_perror (errno); | |
297 | break; | |
298 | ||
299 | case -1: | |
300 | /* Failure: user namespaces not supported. */ | |
301 | fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]); | |
302 | rm_rf (new_root); | |
303 | break; | |
304 | ||
305 | default: | |
306 | { | |
307 | /* Map the current user/group ID in the child's namespace (the | |
308 | default is to get the "overflow UID", i.e., the UID of | |
309 | "nobody"). We must first disallow 'setgroups' for that | |
310 | process. */ | |
311 | disallow_setgroups (child); | |
312 | write_id_map (child, "uid_map", getuid ()); | |
313 | write_id_map (child, "gid_map", getgid ()); | |
314 | ||
315 | int status; | |
316 | waitpid (child, &status, 0); | |
317 | chdir ("/"); /* avoid EBUSY */ | |
318 | rm_rf (new_root); | |
319 | free (new_root); | |
320 | ||
321 | if (WIFEXITED (status)) | |
322 | exit (WEXITSTATUS (status)); | |
323 | else | |
324 | /* Abnormal termination cannot really be reproduced, so exit | |
325 | with 255. */ | |
326 | exit (255); | |
327 | } | |
328 | } | |
329 | } | |
330 | ||
47a60325 | 331 | \f |
99aec37a LC |
332 | #ifdef PROOT_PROGRAM |
333 | ||
334 | /* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and | |
335 | "bind-mounting" STORE in the right place. */ | |
336 | static void | |
337 | exec_with_proot (const char *store, int argc, char *argv[]) | |
338 | { | |
339 | int proot_specific_argc = 4; | |
340 | int proot_argc = argc + proot_specific_argc; | |
4a53c19a | 341 | char *proot_argv[proot_argc + 1], *proot; |
0cd13e72 | 342 | char bind_spec[strlen (store) + 1 + sizeof original_store]; |
99aec37a LC |
343 | |
344 | strcpy (bind_spec, store); | |
345 | strcat (bind_spec, ":"); | |
0cd13e72 | 346 | strcat (bind_spec, original_store); |
99aec37a LC |
347 | |
348 | proot = concat (store, PROOT_PROGRAM); | |
349 | ||
350 | proot_argv[0] = proot; | |
351 | proot_argv[1] = "-b"; | |
352 | proot_argv[2] = bind_spec; | |
353 | proot_argv[3] = "@WRAPPED_PROGRAM@"; | |
354 | ||
355 | for (int i = 0; i < argc; i++) | |
356 | proot_argv[i + proot_specific_argc] = argv[i + 1]; | |
357 | ||
358 | proot_argv[proot_argc] = NULL; | |
359 | ||
360 | /* Seccomp support seems to invariably lead to segfaults; disable it by | |
361 | default. */ | |
362 | setenv ("PROOT_NO_SECCOMP", "1", 0); | |
363 | ||
364 | int err = execv (proot, proot_argv); | |
365 | if (err < 0) | |
366 | assert_perror (errno); | |
367 | } | |
368 | ||
369 | #endif | |
370 | ||
371 | \f | |
64562321 LC |
372 | #if HAVE_EXEC_WITH_LOADER |
373 | ||
374 | /* Execute the wrapped program by invoking the loader (ld.so) directly, | |
375 | passing it the audit module and preloading libfakechroot.so. */ | |
376 | static void | |
377 | exec_with_loader (const char *store, int argc, char *argv[]) | |
378 | { | |
379 | char *loader = concat (store, | |
380 | PROGRAM_INTERPRETER + sizeof original_store); | |
381 | size_t loader_specific_argc = 6; | |
382 | size_t loader_argc = argc + loader_specific_argc; | |
383 | char *loader_argv[loader_argc + 1]; | |
384 | loader_argv[0] = argv[0]; | |
385 | loader_argv[1] = "--audit"; | |
386 | loader_argv[2] = concat (store, | |
387 | LOADER_AUDIT_MODULE + sizeof original_store); | |
388 | loader_argv[3] = "--preload"; | |
389 | loader_argv[4] = concat (store, | |
390 | FAKECHROOT_LIBRARY + sizeof original_store); | |
391 | loader_argv[5] = concat (store, | |
392 | "@WRAPPED_PROGRAM@" + sizeof original_store); | |
393 | ||
394 | for (size_t i = 0; i < argc; i++) | |
395 | loader_argv[i + loader_specific_argc] = argv[i + 1]; | |
396 | ||
397 | loader_argv[loader_argc] = NULL; | |
398 | ||
399 | /* Set up the root directory. */ | |
400 | int err; | |
401 | char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); | |
402 | mirror_directory ("/", new_root, make_symlink); | |
403 | ||
404 | char *new_store = concat (new_root, original_store); | |
405 | char *new_store_parent = dirname (strdup (new_store)); | |
406 | mkdir_p (new_store_parent); | |
407 | symlink (store, new_store); | |
408 | ||
409 | #ifdef GCONV_DIRECTORY | |
410 | /* Tell libc where to find its gconv modules. This is necessary because | |
411 | gconv uses non-interposable 'open' calls. */ | |
412 | char *gconv_path = concat (store, | |
413 | GCONV_DIRECTORY + sizeof original_store); | |
414 | setenv ("GCONV_PATH", gconv_path, 1); | |
415 | free (gconv_path); | |
416 | #endif | |
417 | ||
418 | setenv ("FAKECHROOT_BASE", new_root, 1); | |
419 | ||
420 | pid_t child = fork (); | |
421 | switch (child) | |
422 | { | |
423 | case 0: | |
424 | err = execv (loader, loader_argv); | |
425 | if (err < 0) | |
426 | assert_perror (errno); | |
427 | exit (EXIT_FAILURE); | |
428 | break; | |
429 | ||
430 | case -1: | |
431 | assert_perror (errno); | |
432 | exit (EXIT_FAILURE); | |
433 | break; | |
434 | ||
435 | default: | |
436 | { | |
437 | int status; | |
438 | waitpid (child, &status, 0); | |
439 | chdir ("/"); /* avoid EBUSY */ | |
440 | rm_rf (new_root); | |
441 | free (new_root); | |
442 | ||
443 | close (2); /* flushing stderr should be silent */ | |
444 | ||
445 | if (WIFEXITED (status)) | |
446 | exit (WEXITSTATUS (status)); | |
447 | else | |
448 | /* Abnormal termination cannot really be reproduced, so exit | |
449 | with 255. */ | |
450 | exit (255); | |
451 | } | |
452 | } | |
453 | } | |
454 | ||
455 | #endif | |
456 | ||
457 | \f | |
fde2aec3 LC |
458 | /* Execution engines. */ |
459 | ||
460 | struct engine | |
461 | { | |
462 | const char *name; | |
463 | void (* exec) (const char *, int, char **); | |
464 | }; | |
465 | ||
466 | static void | |
467 | buffer_stderr (void) | |
468 | { | |
469 | static char stderr_buffer[4096]; | |
470 | setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer); | |
471 | } | |
472 | ||
64562321 | 473 | /* The default engine: choose a robust method. */ |
fde2aec3 LC |
474 | static void |
475 | exec_default (const char *store, int argc, char *argv[]) | |
476 | { | |
477 | /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace' | |
478 | fails but 'exec_with_proot' works. */ | |
479 | buffer_stderr (); | |
480 | ||
481 | exec_in_user_namespace (store, argc, argv); | |
482 | #ifdef PROOT_PROGRAM | |
483 | exec_with_proot (store, argc, argv); | |
484 | #endif | |
485 | } | |
486 | ||
64562321 LC |
487 | /* The "performance" engine: choose performance over robustness. */ |
488 | static void | |
489 | exec_performance (const char *store, int argc, char *argv[]) | |
490 | { | |
491 | buffer_stderr (); | |
492 | ||
493 | exec_in_user_namespace (store, argc, argv); | |
494 | #if HAVE_EXEC_WITH_LOADER | |
495 | exec_with_loader (store, argc, argv); | |
496 | #endif | |
497 | } | |
498 | ||
fde2aec3 LC |
499 | /* List of supported engines. */ |
500 | static const struct engine engines[] = | |
501 | { | |
502 | { "default", exec_default }, | |
64562321 | 503 | { "performance", exec_performance }, |
fde2aec3 LC |
504 | { "userns", exec_in_user_namespace }, |
505 | #ifdef PROOT_PROGRAM | |
506 | { "proot", exec_with_proot }, | |
64562321 LC |
507 | #endif |
508 | #if HAVE_EXEC_WITH_LOADER | |
509 | { "fakechroot", exec_with_loader }, | |
fde2aec3 LC |
510 | #endif |
511 | { NULL, NULL } | |
512 | }; | |
513 | ||
514 | /* Return the "execution engine" to use. */ | |
515 | static const struct engine * | |
516 | execution_engine (void) | |
517 | { | |
518 | const char *str = getenv ("GUIX_EXECUTION_ENGINE"); | |
519 | ||
520 | if (str == NULL) | |
521 | str = "default"; | |
522 | ||
523 | try: | |
524 | for (const struct engine *engine = engines; | |
525 | engine->name != NULL; | |
526 | engine++) | |
527 | { | |
528 | if (strcmp (engine->name, str) == 0) | |
529 | return engine; | |
530 | } | |
531 | ||
532 | fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n", | |
533 | str); | |
534 | str = "default"; | |
535 | goto try; | |
536 | } | |
537 | ||
538 | \f | |
47a60325 LC |
539 | int |
540 | main (int argc, char *argv[]) | |
541 | { | |
542 | ssize_t size; | |
543 | char self[PATH_MAX]; | |
544 | size = readlink ("/proc/self/exe", self, sizeof self - 1); | |
545 | assert (size > 0); | |
546 | ||
547 | /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we | |
548 | want to extract "/home/ludo/.local/gnu/store". */ | |
549 | size_t index = strlen (self) | |
0cd13e72 | 550 | - strlen ("@WRAPPED_PROGRAM@") + strlen (original_store); |
47a60325 LC |
551 | char *store = strdup (self); |
552 | store[index] = '\0'; | |
553 | ||
554 | struct stat statbuf; | |
555 | ||
556 | /* If STORE is already at the "right" place, we can execute | |
557 | @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's | |
558 | needed when running one of these wrappers from within an unshare'd | |
559 | namespace, because 'unshare' fails with EPERM in that context. */ | |
0cd13e72 | 560 | if (strcmp (store, original_store) != 0 |
47a60325 LC |
561 | && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0) |
562 | { | |
fde2aec3 LC |
563 | const struct engine *engine = execution_engine (); |
564 | engine->exec (store, argc, argv); | |
47a60325 | 565 | |
fde2aec3 LC |
566 | /* If we reach this point, that's because ENGINE failed to do the |
567 | job. */ | |
bdb9b4e8 | 568 | fprintf (stderr, "\ |
30da3173 LC |
569 | This may be because \"user namespaces\" are not supported on this system.\n\ |
570 | Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\ | |
571 | unless you move it to the '@STORE_DIRECTORY@' directory.\n\ | |
572 | \n\ | |
573 | Please refer to the 'guix pack' documentation for more information.\n"); | |
bdb9b4e8 | 574 | return EXIT_FAILURE; |
47a60325 LC |
575 | } |
576 | ||
577 | /* The executable is available under @STORE_DIRECTORY@, so we can now | |
578 | execute it. */ | |
579 | int err = execv ("@WRAPPED_PROGRAM@", argv); | |
580 | if (err < 0) | |
581 | assert_perror (errno); | |
582 | ||
583 | return EXIT_FAILURE; | |
584 | } |