585 lines
15 KiB
C
585 lines
15 KiB
C
/* GNU Guix --- Functional package management for GNU
|
||
Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
|
||
|
||
This file is part of GNU Guix.
|
||
|
||
GNU Guix is free software; you can redistribute it and/or modify it
|
||
under the terms of the GNU General Public License as published by
|
||
the Free Software Foundation; either version 3 of the License, or (at
|
||
your option) any later version.
|
||
|
||
GNU Guix is distributed in the hope that it will be useful, but
|
||
WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
GNU General Public License for more details.
|
||
|
||
You should have received a copy of the GNU General Public License
|
||
along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
|
||
|
||
/* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
|
||
mount namespace where the store is mounted in its right place.
|
||
|
||
We would happily do that in Scheme using 'call-with-container'. However,
|
||
this very program needs to be relocatable, so it needs to be statically
|
||
linked, which complicates things (Guile's modules can hardly be "linked"
|
||
into a single executable.) */
|
||
|
||
#define _GNU_SOURCE
|
||
#include <stdlib.h>
|
||
#include <stdio.h>
|
||
#include <unistd.h>
|
||
#include <sched.h>
|
||
#include <sys/mount.h>
|
||
#include <errno.h>
|
||
#include <libgen.h>
|
||
#include <limits.h>
|
||
#include <string.h>
|
||
#include <assert.h>
|
||
#include <sys/stat.h>
|
||
#include <sys/types.h>
|
||
#include <sys/wait.h>
|
||
#include <fcntl.h>
|
||
#include <dirent.h>
|
||
#include <sys/syscall.h>
|
||
|
||
/* Whether we're building the ld.so/libfakechroot wrapper. */
|
||
#define HAVE_EXEC_WITH_LOADER \
|
||
(defined PROGRAM_INTERPRETER) && (defined LOADER_AUDIT_MODULE) \
|
||
&& (defined FAKECHROOT_LIBRARY)
|
||
|
||
/* The original store, "/gnu/store" by default. */
|
||
static const char original_store[] = "@STORE_DIRECTORY@";
|
||
|
||
|
||
/* Like 'malloc', but abort if 'malloc' returns NULL. */
|
||
static void *
|
||
xmalloc (size_t size)
|
||
{
|
||
void *result = malloc (size);
|
||
assert (result != NULL);
|
||
return result;
|
||
}
|
||
|
||
/* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
|
||
caller must eventually free. */
|
||
static char *
|
||
concat (const char *directory, const char *file)
|
||
{
|
||
char *result = xmalloc (strlen (directory) + 2 + strlen (file));
|
||
|
||
strcpy (result, directory);
|
||
strcat (result, "/");
|
||
strcat (result, file);
|
||
return result;
|
||
}
|
||
|
||
static void
|
||
mkdir_p (const char *directory)
|
||
{
|
||
if (strcmp (directory, "/") != 0)
|
||
{
|
||
char *parent = dirname (strdupa (directory));
|
||
mkdir_p (parent);
|
||
int err = mkdir (directory, 0700);
|
||
if (err < 0 && errno != EEXIST)
|
||
assert_perror (errno);
|
||
}
|
||
}
|
||
|
||
static void
|
||
rm_rf (const char *directory)
|
||
{
|
||
DIR *stream = opendir (directory);
|
||
|
||
for (struct dirent *entry = readdir (stream);
|
||
entry != NULL;
|
||
entry = readdir (stream))
|
||
{
|
||
if (strcmp (entry->d_name, ".") == 0
|
||
|| strcmp (entry->d_name, "..") == 0)
|
||
continue;
|
||
|
||
char *full = concat (directory, entry->d_name);
|
||
|
||
int err = unlink (full);
|
||
if (err < 0)
|
||
{
|
||
if (errno == EISDIR)
|
||
/* Recurse (we expect a shallow directory structure so there's
|
||
little risk of stack overflow.) */
|
||
rm_rf (full);
|
||
else
|
||
assert_perror (errno);
|
||
}
|
||
|
||
free (full);
|
||
}
|
||
|
||
closedir (stream);
|
||
|
||
int err = rmdir (directory);
|
||
if (err < 0 && errno != ENOENT)
|
||
assert_perror (errno);
|
||
}
|
||
|
||
/* Make TARGET a bind-mount of SOURCE. Take into account ENTRY's type, which
|
||
corresponds to SOURCE. */
|
||
static int
|
||
bind_mount (const char *source, const struct dirent *entry,
|
||
const char *target)
|
||
{
|
||
if (entry->d_type == DT_DIR)
|
||
{
|
||
int err = mkdir (target, 0700);
|
||
if (err != 0)
|
||
return err;
|
||
}
|
||
else
|
||
close (open (target, O_WRONLY | O_CREAT));
|
||
|
||
return mount (source, target, "none",
|
||
MS_BIND | MS_REC | MS_RDONLY, NULL);
|
||
}
|
||
|
||
#if HAVE_EXEC_WITH_LOADER
|
||
|
||
/* Make TARGET a symlink to SOURCE. */
|
||
static int
|
||
make_symlink (const char *source, const struct dirent *entry,
|
||
const char *target)
|
||
{
|
||
return symlink (source, target);
|
||
}
|
||
|
||
#endif
|
||
|
||
/* Mirror with FIRMLINK all the top-level entries in SOURCE to TARGET. */
|
||
static void
|
||
mirror_directory (const char *source, const char *target,
|
||
int (* firmlink) (const char *, const struct dirent *,
|
||
const char *))
|
||
{
|
||
DIR *stream = opendir (source);
|
||
|
||
for (struct dirent *entry = readdir (stream);
|
||
entry != NULL;
|
||
entry = readdir (stream))
|
||
{
|
||
/* XXX: Some file systems may not report a useful 'd_type'. Ignore them
|
||
for now. */
|
||
assert (entry->d_type != DT_UNKNOWN);
|
||
|
||
if (strcmp (entry->d_name, ".") == 0
|
||
|| strcmp (entry->d_name, "..") == 0)
|
||
continue;
|
||
|
||
char *abs_source = concat (source, entry->d_name);
|
||
char *new_entry = concat (target, entry->d_name);
|
||
|
||
if (entry->d_type == DT_LNK)
|
||
{
|
||
char target[PATH_MAX];
|
||
|
||
ssize_t result = readlink (abs_source, target, sizeof target - 1);
|
||
if (result > 0)
|
||
{
|
||
target[result] = '\0';
|
||
int err = symlink (target, new_entry);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
}
|
||
}
|
||
else
|
||
{
|
||
/* Create the mount point. */
|
||
int err = firmlink (abs_source, entry, new_entry);
|
||
|
||
/* It used to be that only directories could be bind-mounted. Thus,
|
||
keep going if we fail to bind-mount a non-directory entry.
|
||
That's OK because regular files in the root file system are
|
||
usually uninteresting. */
|
||
if (err != 0 && entry->d_type != DT_DIR)
|
||
assert_perror (errno);
|
||
|
||
free (new_entry);
|
||
free (abs_source);
|
||
}
|
||
}
|
||
|
||
closedir (stream);
|
||
}
|
||
|
||
/* Write the user/group ID map for PID to FILE, mapping ID to itself. See
|
||
user_namespaces(7). */
|
||
static void
|
||
write_id_map (pid_t pid, const char *file, int id)
|
||
{
|
||
char id_map_file[100];
|
||
snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file);
|
||
|
||
char id_map[100];
|
||
|
||
/* Map root and the current user. */
|
||
int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id);
|
||
int fd = open (id_map_file, O_WRONLY);
|
||
if (fd < 0)
|
||
assert_perror (errno);
|
||
|
||
int n = write (fd, id_map, len);
|
||
if (n < 0)
|
||
assert_perror (errno);
|
||
|
||
close (fd);
|
||
}
|
||
|
||
/* Disallow setgroups(2) for PID. */
|
||
static void
|
||
disallow_setgroups (pid_t pid)
|
||
{
|
||
char file[100];
|
||
|
||
snprintf (file, sizeof file, "/proc/%d/setgroups", pid);
|
||
|
||
int fd = open (file, O_WRONLY);
|
||
if (fd < 0)
|
||
assert_perror (errno);
|
||
|
||
int err = write (fd, "deny", 5);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
|
||
close (fd);
|
||
}
|
||
|
||
/* Run the wrapper program in a separate mount user namespace. Return only
|
||
upon failure. */
|
||
static void
|
||
exec_in_user_namespace (const char *store, int argc, char *argv[])
|
||
{
|
||
/* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
|
||
bind-mounted in the right place. */
|
||
int err;
|
||
char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
|
||
char *new_store = concat (new_root, original_store);
|
||
char *cwd = get_current_dir_name ();
|
||
|
||
/* Create a child with separate namespaces and set up bind-mounts from
|
||
there. That way, bind-mounts automatically disappear when the child
|
||
exits, which simplifies cleanup for the parent. Note: clone is more
|
||
convenient than fork + unshare since the parent can directly write
|
||
the child uid_map/gid_map files. */
|
||
pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER,
|
||
NULL, NULL, NULL);
|
||
switch (child)
|
||
{
|
||
case 0:
|
||
/* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
|
||
we cannot make NEW_ROOT a tmpfs (which would have saved the need
|
||
for 'rm_rf'.) */
|
||
mirror_directory ("/", new_root, bind_mount);
|
||
mkdir_p (new_store);
|
||
err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY,
|
||
NULL);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
|
||
chdir (new_root);
|
||
err = chroot (new_root);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
|
||
/* Change back to where we were before chroot'ing. */
|
||
chdir (cwd);
|
||
|
||
int err = execv ("@WRAPPED_PROGRAM@", argv);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
break;
|
||
|
||
case -1:
|
||
/* Failure: user namespaces not supported. */
|
||
fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]);
|
||
rm_rf (new_root);
|
||
break;
|
||
|
||
default:
|
||
{
|
||
/* Map the current user/group ID in the child's namespace (the
|
||
default is to get the "overflow UID", i.e., the UID of
|
||
"nobody"). We must first disallow 'setgroups' for that
|
||
process. */
|
||
disallow_setgroups (child);
|
||
write_id_map (child, "uid_map", getuid ());
|
||
write_id_map (child, "gid_map", getgid ());
|
||
|
||
int status;
|
||
waitpid (child, &status, 0);
|
||
chdir ("/"); /* avoid EBUSY */
|
||
rm_rf (new_root);
|
||
free (new_root);
|
||
|
||
if (WIFEXITED (status))
|
||
exit (WEXITSTATUS (status));
|
||
else
|
||
/* Abnormal termination cannot really be reproduced, so exit
|
||
with 255. */
|
||
exit (255);
|
||
}
|
||
}
|
||
}
|
||
|
||
|
||
#ifdef PROOT_PROGRAM
|
||
|
||
/* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
|
||
"bind-mounting" STORE in the right place. */
|
||
static void
|
||
exec_with_proot (const char *store, int argc, char *argv[])
|
||
{
|
||
int proot_specific_argc = 4;
|
||
int proot_argc = argc + proot_specific_argc;
|
||
char *proot_argv[proot_argc + 1], *proot;
|
||
char bind_spec[strlen (store) + 1 + sizeof original_store];
|
||
|
||
strcpy (bind_spec, store);
|
||
strcat (bind_spec, ":");
|
||
strcat (bind_spec, original_store);
|
||
|
||
proot = concat (store, PROOT_PROGRAM);
|
||
|
||
proot_argv[0] = proot;
|
||
proot_argv[1] = "-b";
|
||
proot_argv[2] = bind_spec;
|
||
proot_argv[3] = "@WRAPPED_PROGRAM@";
|
||
|
||
for (int i = 0; i < argc; i++)
|
||
proot_argv[i + proot_specific_argc] = argv[i + 1];
|
||
|
||
proot_argv[proot_argc] = NULL;
|
||
|
||
/* Seccomp support seems to invariably lead to segfaults; disable it by
|
||
default. */
|
||
setenv ("PROOT_NO_SECCOMP", "1", 0);
|
||
|
||
int err = execv (proot, proot_argv);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
}
|
||
|
||
#endif
|
||
|
||
|
||
#if HAVE_EXEC_WITH_LOADER
|
||
|
||
/* Execute the wrapped program by invoking the loader (ld.so) directly,
|
||
passing it the audit module and preloading libfakechroot.so. */
|
||
static void
|
||
exec_with_loader (const char *store, int argc, char *argv[])
|
||
{
|
||
char *loader = concat (store,
|
||
PROGRAM_INTERPRETER + sizeof original_store);
|
||
size_t loader_specific_argc = 6;
|
||
size_t loader_argc = argc + loader_specific_argc;
|
||
char *loader_argv[loader_argc + 1];
|
||
loader_argv[0] = argv[0];
|
||
loader_argv[1] = "--audit";
|
||
loader_argv[2] = concat (store,
|
||
LOADER_AUDIT_MODULE + sizeof original_store);
|
||
loader_argv[3] = "--preload";
|
||
loader_argv[4] = concat (store,
|
||
FAKECHROOT_LIBRARY + sizeof original_store);
|
||
loader_argv[5] = concat (store,
|
||
"@WRAPPED_PROGRAM@" + sizeof original_store);
|
||
|
||
for (size_t i = 0; i < argc; i++)
|
||
loader_argv[i + loader_specific_argc] = argv[i + 1];
|
||
|
||
loader_argv[loader_argc] = NULL;
|
||
|
||
/* Set up the root directory. */
|
||
int err;
|
||
char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
|
||
mirror_directory ("/", new_root, make_symlink);
|
||
|
||
char *new_store = concat (new_root, original_store);
|
||
char *new_store_parent = dirname (strdup (new_store));
|
||
mkdir_p (new_store_parent);
|
||
symlink (store, new_store);
|
||
|
||
#ifdef GCONV_DIRECTORY
|
||
/* Tell libc where to find its gconv modules. This is necessary because
|
||
gconv uses non-interposable 'open' calls. */
|
||
char *gconv_path = concat (store,
|
||
GCONV_DIRECTORY + sizeof original_store);
|
||
setenv ("GCONV_PATH", gconv_path, 1);
|
||
free (gconv_path);
|
||
#endif
|
||
|
||
setenv ("FAKECHROOT_BASE", new_root, 1);
|
||
|
||
pid_t child = fork ();
|
||
switch (child)
|
||
{
|
||
case 0:
|
||
err = execv (loader, loader_argv);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
exit (EXIT_FAILURE);
|
||
break;
|
||
|
||
case -1:
|
||
assert_perror (errno);
|
||
exit (EXIT_FAILURE);
|
||
break;
|
||
|
||
default:
|
||
{
|
||
int status;
|
||
waitpid (child, &status, 0);
|
||
chdir ("/"); /* avoid EBUSY */
|
||
rm_rf (new_root);
|
||
free (new_root);
|
||
|
||
close (2); /* flushing stderr should be silent */
|
||
|
||
if (WIFEXITED (status))
|
||
exit (WEXITSTATUS (status));
|
||
else
|
||
/* Abnormal termination cannot really be reproduced, so exit
|
||
with 255. */
|
||
exit (255);
|
||
}
|
||
}
|
||
}
|
||
|
||
#endif
|
||
|
||
|
||
/* Execution engines. */
|
||
|
||
struct engine
|
||
{
|
||
const char *name;
|
||
void (* exec) (const char *, int, char **);
|
||
};
|
||
|
||
static void
|
||
buffer_stderr (void)
|
||
{
|
||
static char stderr_buffer[4096];
|
||
setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer);
|
||
}
|
||
|
||
/* The default engine: choose a robust method. */
|
||
static void
|
||
exec_default (const char *store, int argc, char *argv[])
|
||
{
|
||
/* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
|
||
fails but 'exec_with_proot' works. */
|
||
buffer_stderr ();
|
||
|
||
exec_in_user_namespace (store, argc, argv);
|
||
#ifdef PROOT_PROGRAM
|
||
exec_with_proot (store, argc, argv);
|
||
#endif
|
||
}
|
||
|
||
/* The "performance" engine: choose performance over robustness. */
|
||
static void
|
||
exec_performance (const char *store, int argc, char *argv[])
|
||
{
|
||
buffer_stderr ();
|
||
|
||
exec_in_user_namespace (store, argc, argv);
|
||
#if HAVE_EXEC_WITH_LOADER
|
||
exec_with_loader (store, argc, argv);
|
||
#endif
|
||
}
|
||
|
||
/* List of supported engines. */
|
||
static const struct engine engines[] =
|
||
{
|
||
{ "default", exec_default },
|
||
{ "performance", exec_performance },
|
||
{ "userns", exec_in_user_namespace },
|
||
#ifdef PROOT_PROGRAM
|
||
{ "proot", exec_with_proot },
|
||
#endif
|
||
#if HAVE_EXEC_WITH_LOADER
|
||
{ "fakechroot", exec_with_loader },
|
||
#endif
|
||
{ NULL, NULL }
|
||
};
|
||
|
||
/* Return the "execution engine" to use. */
|
||
static const struct engine *
|
||
execution_engine (void)
|
||
{
|
||
const char *str = getenv ("GUIX_EXECUTION_ENGINE");
|
||
|
||
if (str == NULL)
|
||
str = "default";
|
||
|
||
try:
|
||
for (const struct engine *engine = engines;
|
||
engine->name != NULL;
|
||
engine++)
|
||
{
|
||
if (strcmp (engine->name, str) == 0)
|
||
return engine;
|
||
}
|
||
|
||
fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n",
|
||
str);
|
||
str = "default";
|
||
goto try;
|
||
}
|
||
|
||
|
||
int
|
||
main (int argc, char *argv[])
|
||
{
|
||
ssize_t size;
|
||
char self[PATH_MAX];
|
||
size = readlink ("/proc/self/exe", self, sizeof self - 1);
|
||
assert (size > 0);
|
||
|
||
/* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
|
||
want to extract "/home/ludo/.local/gnu/store". */
|
||
size_t index = strlen (self)
|
||
- strlen ("@WRAPPED_PROGRAM@") + strlen (original_store);
|
||
char *store = strdup (self);
|
||
store[index] = '\0';
|
||
|
||
struct stat statbuf;
|
||
|
||
/* If STORE is already at the "right" place, we can execute
|
||
@WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
|
||
needed when running one of these wrappers from within an unshare'd
|
||
namespace, because 'unshare' fails with EPERM in that context. */
|
||
if (strcmp (store, original_store) != 0
|
||
&& lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0)
|
||
{
|
||
const struct engine *engine = execution_engine ();
|
||
engine->exec (store, argc, argv);
|
||
|
||
/* If we reach this point, that's because ENGINE failed to do the
|
||
job. */
|
||
fprintf (stderr, "\
|
||
This may be because \"user namespaces\" are not supported on this system.\n\
|
||
Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
|
||
unless you move it to the '@STORE_DIRECTORY@' directory.\n\
|
||
\n\
|
||
Please refer to the 'guix pack' documentation for more information.\n");
|
||
return EXIT_FAILURE;
|
||
}
|
||
|
||
/* The executable is available under @STORE_DIRECTORY@, so we can now
|
||
execute it. */
|
||
int err = execv ("@WRAPPED_PROGRAM@", argv);
|
||
if (err < 0)
|
||
assert_perror (errno);
|
||
|
||
return EXIT_FAILURE;
|
||
}
|