Commit | Line | Data |
---|---|---|
47a60325 | 1 | /* GNU Guix --- Functional package management for GNU |
2520059b | 2 | Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org> |
47a60325 LC |
3 | |
4 | This file is part of GNU Guix. | |
5 | ||
6 | GNU Guix is free software; you can redistribute it and/or modify it | |
7 | under the terms of the GNU General Public License as published by | |
8 | the Free Software Foundation; either version 3 of the License, or (at | |
9 | your option) any later version. | |
10 | ||
11 | GNU Guix is distributed in the hope that it will be useful, but | |
12 | WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
14 | GNU General Public License for more details. | |
15 | ||
16 | You should have received a copy of the GNU General Public License | |
17 | along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */ | |
18 | ||
19 | /* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate | |
20 | mount namespace where the store is mounted in its right place. | |
21 | ||
22 | We would happily do that in Scheme using 'call-with-container'. However, | |
23 | this very program needs to be relocatable, so it needs to be statically | |
24 | linked, which complicates things (Guile's modules can hardly be "linked" | |
25 | into a single executable.) */ | |
26 | ||
27 | #define _GNU_SOURCE | |
28 | #include <stdlib.h> | |
29 | #include <stdio.h> | |
30 | #include <unistd.h> | |
31 | #include <sched.h> | |
32 | #include <sys/mount.h> | |
33 | #include <errno.h> | |
34 | #include <libgen.h> | |
35 | #include <limits.h> | |
36 | #include <string.h> | |
37 | #include <assert.h> | |
38 | #include <sys/stat.h> | |
39 | #include <sys/types.h> | |
40 | #include <sys/wait.h> | |
41 | #include <fcntl.h> | |
42 | #include <dirent.h> | |
30da3173 | 43 | #include <sys/syscall.h> |
47a60325 | 44 | |
14928af2 LC |
45 | /* Like 'malloc', but abort if 'malloc' returns NULL. */ |
46 | static void * | |
47 | xmalloc (size_t size) | |
48 | { | |
49 | void *result = malloc (size); | |
50 | assert (result != NULL); | |
51 | return result; | |
52 | } | |
53 | ||
47a60325 LC |
54 | /* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the |
55 | caller must eventually free. */ | |
56 | static char * | |
57 | concat (const char *directory, const char *file) | |
58 | { | |
14928af2 | 59 | char *result = xmalloc (strlen (directory) + 2 + strlen (file)); |
47a60325 LC |
60 | |
61 | strcpy (result, directory); | |
62 | strcat (result, "/"); | |
63 | strcat (result, file); | |
64 | return result; | |
65 | } | |
66 | ||
67 | static void | |
68 | mkdir_p (const char *directory) | |
69 | { | |
70 | if (strcmp (directory, "/") != 0) | |
71 | { | |
72 | char *parent = dirname (strdupa (directory)); | |
73 | mkdir_p (parent); | |
74 | int err = mkdir (directory, 0700); | |
75 | if (err < 0 && errno != EEXIST) | |
76 | assert_perror (errno); | |
77 | } | |
78 | } | |
79 | ||
80 | static void | |
81 | rm_rf (const char *directory) | |
82 | { | |
83 | DIR *stream = opendir (directory); | |
84 | ||
85 | for (struct dirent *entry = readdir (stream); | |
86 | entry != NULL; | |
87 | entry = readdir (stream)) | |
88 | { | |
89 | if (strcmp (entry->d_name, ".") == 0 | |
90 | || strcmp (entry->d_name, "..") == 0) | |
91 | continue; | |
92 | ||
93 | char *full = concat (directory, entry->d_name); | |
94 | ||
95 | int err = unlink (full); | |
96 | if (err < 0) | |
97 | { | |
98 | if (errno == EISDIR) | |
99 | /* Recurse (we expect a shallow directory structure so there's | |
100 | little risk of stack overflow.) */ | |
101 | rm_rf (full); | |
102 | else | |
103 | assert_perror (errno); | |
104 | } | |
105 | ||
106 | free (full); | |
107 | } | |
108 | ||
109 | closedir (stream); | |
110 | ||
111 | int err = rmdir (directory); | |
112 | if (err < 0 && errno != ENOENT) | |
113 | assert_perror (errno); | |
114 | } | |
115 | ||
116 | /* Bind mount all the top-level entries in SOURCE to TARGET. */ | |
117 | static void | |
118 | bind_mount (const char *source, const char *target) | |
119 | { | |
120 | DIR *stream = opendir (source); | |
121 | ||
122 | for (struct dirent *entry = readdir (stream); | |
123 | entry != NULL; | |
124 | entry = readdir (stream)) | |
125 | { | |
126 | /* XXX: Some file systems may not report a useful 'd_type'. Ignore them | |
127 | for now. */ | |
128 | assert (entry->d_type != DT_UNKNOWN); | |
129 | ||
130 | if (strcmp (entry->d_name, ".") == 0 | |
131 | || strcmp (entry->d_name, "..") == 0) | |
132 | continue; | |
133 | ||
134 | char *abs_source = concat (source, entry->d_name); | |
135 | char *new_entry = concat (target, entry->d_name); | |
136 | ||
137 | if (entry->d_type == DT_LNK) | |
138 | { | |
139 | char target[PATH_MAX]; | |
140 | ||
141 | ssize_t result = readlink (abs_source, target, sizeof target - 1); | |
142 | if (result > 0) | |
143 | { | |
144 | target[result] = '\0'; | |
145 | int err = symlink (target, new_entry); | |
146 | if (err < 0) | |
147 | assert_perror (errno); | |
148 | } | |
149 | } | |
150 | else | |
151 | { | |
152 | /* Create the mount point. */ | |
153 | if (entry->d_type == DT_DIR) | |
154 | { | |
155 | int err = mkdir (new_entry, 0700); | |
156 | if (err != 0) | |
157 | assert_perror (errno); | |
158 | } | |
159 | else | |
160 | close (open (new_entry, O_WRONLY | O_CREAT)); | |
161 | ||
162 | int err = mount (abs_source, new_entry, "none", | |
163 | MS_BIND | MS_REC | MS_RDONLY, NULL); | |
164 | ||
165 | /* It used to be that only directories could be bind-mounted. Thus, | |
166 | keep going if we fail to bind-mount a non-directory entry. | |
167 | That's OK because regular files in the root file system are | |
168 | usually uninteresting. */ | |
169 | if (err != 0 && entry->d_type != DT_DIR) | |
170 | assert_perror (errno); | |
171 | ||
172 | free (new_entry); | |
173 | free (abs_source); | |
174 | } | |
175 | } | |
176 | ||
177 | closedir (stream); | |
178 | } | |
179 | ||
30da3173 LC |
180 | /* Write the user/group ID map for PID to FILE, mapping ID to itself. See |
181 | user_namespaces(7). */ | |
182 | static void | |
183 | write_id_map (pid_t pid, const char *file, int id) | |
184 | { | |
185 | char id_map_file[100]; | |
186 | snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file); | |
187 | ||
188 | char id_map[100]; | |
189 | ||
190 | /* Map root and the current user. */ | |
191 | int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id); | |
192 | int fd = open (id_map_file, O_WRONLY); | |
193 | if (fd < 0) | |
194 | assert_perror (errno); | |
195 | ||
196 | int n = write (fd, id_map, len); | |
197 | if (n < 0) | |
198 | assert_perror (errno); | |
199 | ||
200 | close (fd); | |
201 | } | |
202 | ||
203 | /* Disallow setgroups(2) for PID. */ | |
204 | static void | |
205 | disallow_setgroups (pid_t pid) | |
206 | { | |
207 | char file[100]; | |
208 | ||
209 | snprintf (file, sizeof file, "/proc/%d/setgroups", pid); | |
210 | ||
211 | int fd = open (file, O_WRONLY); | |
212 | if (fd < 0) | |
213 | assert_perror (errno); | |
214 | ||
215 | int err = write (fd, "deny", 5); | |
216 | if (err < 0) | |
217 | assert_perror (errno); | |
218 | ||
219 | close (fd); | |
220 | } | |
221 | ||
bdb9b4e8 LC |
222 | /* Run the wrapper program in a separate mount user namespace. Return only |
223 | upon failure. */ | |
224 | static void | |
225 | exec_in_user_namespace (const char *store, int argc, char *argv[]) | |
226 | { | |
227 | /* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is | |
228 | bind-mounted in the right place. */ | |
229 | int err; | |
230 | char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX")); | |
231 | char *new_store = concat (new_root, "@STORE_DIRECTORY@"); | |
232 | char *cwd = get_current_dir_name (); | |
233 | ||
234 | /* Create a child with separate namespaces and set up bind-mounts from | |
235 | there. That way, bind-mounts automatically disappear when the child | |
236 | exits, which simplifies cleanup for the parent. Note: clone is more | |
237 | convenient than fork + unshare since the parent can directly write | |
238 | the child uid_map/gid_map files. */ | |
239 | pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER, | |
240 | NULL, NULL, NULL); | |
241 | switch (child) | |
242 | { | |
243 | case 0: | |
244 | /* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461> | |
245 | we cannot make NEW_ROOT a tmpfs (which would have saved the need | |
246 | for 'rm_rf'.) */ | |
247 | bind_mount ("/", new_root); | |
248 | mkdir_p (new_store); | |
249 | err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY, | |
250 | NULL); | |
251 | if (err < 0) | |
252 | assert_perror (errno); | |
253 | ||
254 | chdir (new_root); | |
255 | err = chroot (new_root); | |
256 | if (err < 0) | |
257 | assert_perror (errno); | |
258 | ||
259 | /* Change back to where we were before chroot'ing. */ | |
260 | chdir (cwd); | |
261 | ||
262 | int err = execv ("@WRAPPED_PROGRAM@", argv); | |
263 | if (err < 0) | |
264 | assert_perror (errno); | |
265 | break; | |
266 | ||
267 | case -1: | |
268 | /* Failure: user namespaces not supported. */ | |
269 | fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]); | |
270 | rm_rf (new_root); | |
271 | break; | |
272 | ||
273 | default: | |
274 | { | |
275 | /* Map the current user/group ID in the child's namespace (the | |
276 | default is to get the "overflow UID", i.e., the UID of | |
277 | "nobody"). We must first disallow 'setgroups' for that | |
278 | process. */ | |
279 | disallow_setgroups (child); | |
280 | write_id_map (child, "uid_map", getuid ()); | |
281 | write_id_map (child, "gid_map", getgid ()); | |
282 | ||
283 | int status; | |
284 | waitpid (child, &status, 0); | |
285 | chdir ("/"); /* avoid EBUSY */ | |
286 | rm_rf (new_root); | |
287 | free (new_root); | |
288 | ||
289 | if (WIFEXITED (status)) | |
290 | exit (WEXITSTATUS (status)); | |
291 | else | |
292 | /* Abnormal termination cannot really be reproduced, so exit | |
293 | with 255. */ | |
294 | exit (255); | |
295 | } | |
296 | } | |
297 | } | |
298 | ||
47a60325 | 299 | \f |
99aec37a LC |
300 | #ifdef PROOT_PROGRAM |
301 | ||
302 | /* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and | |
303 | "bind-mounting" STORE in the right place. */ | |
304 | static void | |
305 | exec_with_proot (const char *store, int argc, char *argv[]) | |
306 | { | |
307 | int proot_specific_argc = 4; | |
308 | int proot_argc = argc + proot_specific_argc; | |
4a53c19a | 309 | char *proot_argv[proot_argc + 1], *proot; |
99aec37a LC |
310 | char bind_spec[strlen (store) + 1 + sizeof "@STORE_DIRECTORY@"]; |
311 | ||
312 | strcpy (bind_spec, store); | |
313 | strcat (bind_spec, ":"); | |
314 | strcat (bind_spec, "@STORE_DIRECTORY@"); | |
315 | ||
316 | proot = concat (store, PROOT_PROGRAM); | |
317 | ||
318 | proot_argv[0] = proot; | |
319 | proot_argv[1] = "-b"; | |
320 | proot_argv[2] = bind_spec; | |
321 | proot_argv[3] = "@WRAPPED_PROGRAM@"; | |
322 | ||
323 | for (int i = 0; i < argc; i++) | |
324 | proot_argv[i + proot_specific_argc] = argv[i + 1]; | |
325 | ||
326 | proot_argv[proot_argc] = NULL; | |
327 | ||
328 | /* Seccomp support seems to invariably lead to segfaults; disable it by | |
329 | default. */ | |
330 | setenv ("PROOT_NO_SECCOMP", "1", 0); | |
331 | ||
332 | int err = execv (proot, proot_argv); | |
333 | if (err < 0) | |
334 | assert_perror (errno); | |
335 | } | |
336 | ||
337 | #endif | |
338 | ||
339 | \f | |
fde2aec3 LC |
340 | /* Execution engines. */ |
341 | ||
342 | struct engine | |
343 | { | |
344 | const char *name; | |
345 | void (* exec) (const char *, int, char **); | |
346 | }; | |
347 | ||
348 | static void | |
349 | buffer_stderr (void) | |
350 | { | |
351 | static char stderr_buffer[4096]; | |
352 | setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer); | |
353 | } | |
354 | ||
355 | /* The default engine. */ | |
356 | static void | |
357 | exec_default (const char *store, int argc, char *argv[]) | |
358 | { | |
359 | /* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace' | |
360 | fails but 'exec_with_proot' works. */ | |
361 | buffer_stderr (); | |
362 | ||
363 | exec_in_user_namespace (store, argc, argv); | |
364 | #ifdef PROOT_PROGRAM | |
365 | exec_with_proot (store, argc, argv); | |
366 | #endif | |
367 | } | |
368 | ||
369 | /* List of supported engines. */ | |
370 | static const struct engine engines[] = | |
371 | { | |
372 | { "default", exec_default }, | |
373 | { "userns", exec_in_user_namespace }, | |
374 | #ifdef PROOT_PROGRAM | |
375 | { "proot", exec_with_proot }, | |
376 | #endif | |
377 | { NULL, NULL } | |
378 | }; | |
379 | ||
380 | /* Return the "execution engine" to use. */ | |
381 | static const struct engine * | |
382 | execution_engine (void) | |
383 | { | |
384 | const char *str = getenv ("GUIX_EXECUTION_ENGINE"); | |
385 | ||
386 | if (str == NULL) | |
387 | str = "default"; | |
388 | ||
389 | try: | |
390 | for (const struct engine *engine = engines; | |
391 | engine->name != NULL; | |
392 | engine++) | |
393 | { | |
394 | if (strcmp (engine->name, str) == 0) | |
395 | return engine; | |
396 | } | |
397 | ||
398 | fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n", | |
399 | str); | |
400 | str = "default"; | |
401 | goto try; | |
402 | } | |
403 | ||
404 | \f | |
47a60325 LC |
405 | int |
406 | main (int argc, char *argv[]) | |
407 | { | |
408 | ssize_t size; | |
409 | char self[PATH_MAX]; | |
410 | size = readlink ("/proc/self/exe", self, sizeof self - 1); | |
411 | assert (size > 0); | |
412 | ||
413 | /* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we | |
414 | want to extract "/home/ludo/.local/gnu/store". */ | |
415 | size_t index = strlen (self) | |
416 | - strlen ("@WRAPPED_PROGRAM@") | |
417 | + strlen ("@STORE_DIRECTORY@"); | |
418 | char *store = strdup (self); | |
419 | store[index] = '\0'; | |
420 | ||
421 | struct stat statbuf; | |
422 | ||
423 | /* If STORE is already at the "right" place, we can execute | |
424 | @WRAPPED_PROGRAM@ right away. This is not just an optimization: it's | |
425 | needed when running one of these wrappers from within an unshare'd | |
426 | namespace, because 'unshare' fails with EPERM in that context. */ | |
427 | if (strcmp (store, "@STORE_DIRECTORY@") != 0 | |
428 | && lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0) | |
429 | { | |
fde2aec3 LC |
430 | const struct engine *engine = execution_engine (); |
431 | engine->exec (store, argc, argv); | |
47a60325 | 432 | |
fde2aec3 LC |
433 | /* If we reach this point, that's because ENGINE failed to do the |
434 | job. */ | |
bdb9b4e8 | 435 | fprintf (stderr, "\ |
30da3173 LC |
436 | This may be because \"user namespaces\" are not supported on this system.\n\ |
437 | Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\ | |
438 | unless you move it to the '@STORE_DIRECTORY@' directory.\n\ | |
439 | \n\ | |
440 | Please refer to the 'guix pack' documentation for more information.\n"); | |
bdb9b4e8 | 441 | return EXIT_FAILURE; |
47a60325 LC |
442 | } |
443 | ||
444 | /* The executable is available under @STORE_DIRECTORY@, so we can now | |
445 | execute it. */ | |
446 | int err = execv ("@WRAPPED_PROGRAM@", argv); | |
447 | if (err < 0) | |
448 | assert_perror (errno); | |
449 | ||
450 | return EXIT_FAILURE; | |
451 | } |