me
/
guix
Archived
1
0
Fork 0
This repository has been archived on 2024-08-07. You can view files and clone it, but cannot push or open issues/pull-requests.
guix/gnu/packages/aux-files/run-in-namespace.c

694 lines
18 KiB
C
Raw Blame History

This file contains invisible Unicode characters!

This file contains invisible Unicode characters that may be processed differently from what appears below. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to reveal hidden characters.

/* GNU Guix --- Functional package management for GNU
Copyright (C) 2018, 2019, 2020 Ludovic Courtès <ludo@gnu.org>
This file is part of GNU Guix.
GNU Guix is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at
your option) any later version.
GNU Guix is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with GNU Guix. If not, see <http://www.gnu.org/licenses/>. */
/* Make the given @WRAPPED_PROGRAM@ relocatable by executing it in a separate
mount namespace where the store is mounted in its right place.
We would happily do that in Scheme using 'call-with-container'. However,
this very program needs to be relocatable, so it needs to be statically
linked, which complicates things (Guile's modules can hardly be "linked"
into a single executable.) */
#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sched.h>
#include <sys/mount.h>
#include <errno.h>
#include <libgen.h>
#include <limits.h>
#include <string.h>
#include <assert.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <fcntl.h>
#include <dirent.h>
#include <sys/syscall.h>
#include <sys/prctl.h>
/* Whether we're building the ld.so/libfakechroot wrapper. */
#define HAVE_EXEC_WITH_LOADER \
(defined PROGRAM_INTERPRETER) && (defined LOADER_AUDIT_MODULE) \
&& (defined FAKECHROOT_LIBRARY)
/* The original store, "/gnu/store" by default. */
static const char original_store[] = "@STORE_DIRECTORY@";
/* Like 'malloc', but abort if 'malloc' returns NULL. */
static void *
xmalloc (size_t size)
{
void *result = malloc (size);
assert (result != NULL);
return result;
}
/* Concatenate DIRECTORY, a slash, and FILE. Return the result, which the
caller must eventually free. */
static char *
concat (const char *directory, const char *file)
{
char *result = xmalloc (strlen (directory) + 2 + strlen (file));
strcpy (result, directory);
strcat (result, "/");
strcat (result, file);
return result;
}
static void
mkdir_p (const char *directory)
{
if (strcmp (directory, "/") != 0)
{
char *parent = dirname (strdupa (directory));
mkdir_p (parent);
int err = mkdir (directory, 0700);
if (err < 0 && errno != EEXIST)
assert_perror (errno);
}
}
static void
rm_rf (const char *directory)
{
DIR *stream = opendir (directory);
for (struct dirent *entry = readdir (stream);
entry != NULL;
entry = readdir (stream))
{
if (strcmp (entry->d_name, ".") == 0
|| strcmp (entry->d_name, "..") == 0)
continue;
char *full = concat (directory, entry->d_name);
int err = unlink (full);
if (err < 0)
{
if (errno == EISDIR)
/* Recurse (we expect a shallow directory structure so there's
little risk of stack overflow.) */
rm_rf (full);
else
assert_perror (errno);
}
free (full);
}
closedir (stream);
int err = rmdir (directory);
if (err < 0 && errno != ENOENT)
assert_perror (errno);
}
/* Make TARGET a bind-mount of SOURCE. Take into account ENTRY's type, which
corresponds to SOURCE. */
static int
bind_mount (const char *source, const struct dirent *entry,
const char *target)
{
if (entry->d_type == DT_DIR)
{
int err = mkdir (target, 0700);
if (err != 0)
return err;
}
else
close (open (target, O_WRONLY | O_CREAT));
return mount (source, target, "none",
MS_BIND | MS_REC | MS_RDONLY, NULL);
}
#if HAVE_EXEC_WITH_LOADER
/* Make TARGET a symlink to SOURCE. */
static int
make_symlink (const char *source, const struct dirent *entry,
const char *target)
{
return symlink (source, target);
}
#endif
/* Mirror with FIRMLINK all the top-level entries in SOURCE to TARGET. */
static void
mirror_directory (const char *source, const char *target,
int (* firmlink) (const char *, const struct dirent *,
const char *))
{
DIR *stream = opendir (source);
for (struct dirent *entry = readdir (stream);
entry != NULL;
entry = readdir (stream))
{
/* XXX: Some file systems may not report a useful 'd_type'. Ignore them
for now. */
assert (entry->d_type != DT_UNKNOWN);
if (strcmp (entry->d_name, ".") == 0
|| strcmp (entry->d_name, "..") == 0)
continue;
char *abs_source = concat (source, entry->d_name);
char *new_entry = concat (target, entry->d_name);
if (entry->d_type == DT_LNK)
{
char target[PATH_MAX];
ssize_t result = readlink (abs_source, target, sizeof target - 1);
if (result > 0)
{
target[result] = '\0';
int err = symlink (target, new_entry);
if (err < 0)
assert_perror (errno);
}
}
else
{
/* Create the mount point. */
int err = firmlink (abs_source, entry, new_entry);
/* It used to be that only directories could be bind-mounted. Thus,
keep going if we fail to bind-mount a non-directory entry.
That's OK because regular files in the root file system are
usually uninteresting. */
if (err != 0 && entry->d_type != DT_DIR)
assert_perror (errno);
free (new_entry);
free (abs_source);
}
}
closedir (stream);
}
/* Write the user/group ID map for PID to FILE, mapping ID to itself. See
user_namespaces(7). */
static void
write_id_map (pid_t pid, const char *file, int id)
{
char id_map_file[100];
snprintf (id_map_file, sizeof id_map_file, "/proc/%d/%s", pid, file);
char id_map[100];
/* Map root and the current user. */
int len = snprintf (id_map, sizeof id_map, "%d %d 1\n", id, id);
int fd = open (id_map_file, O_WRONLY);
if (fd < 0)
assert_perror (errno);
int n = write (fd, id_map, len);
if (n < 0)
assert_perror (errno);
close (fd);
}
/* Disallow setgroups(2) for PID. */
static void
disallow_setgroups (pid_t pid)
{
char file[100];
snprintf (file, sizeof file, "/proc/%d/setgroups", pid);
int fd = open (file, O_WRONLY);
if (fd < 0)
assert_perror (errno);
int err = write (fd, "deny", 5);
if (err < 0)
assert_perror (errno);
close (fd);
}
/* Run the wrapper program in a separate mount user namespace. Return only
upon failure. */
static void
exec_in_user_namespace (const char *store, int argc, char *argv[])
{
/* Spawn @WRAPPED_PROGRAM@ in a separate namespace where STORE is
bind-mounted in the right place. */
int err, is_tmpfs;
char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
char *new_store = concat (new_root, original_store);
char *cwd = get_current_dir_name ();
/* Become the new parent of grand-children when their parent dies. */
prctl (PR_SET_CHILD_SUBREAPER, 1);
/* Optionally, make NEW_ROOT a tmpfs. That way, if we have to leave it
behind because there are sub-processes still running when this wrapper
exits, it's OK. */
err = mount ("none", new_root, "tmpfs", 0, NULL);
is_tmpfs = (err == 0);
/* Create a child with separate namespaces and set up bind-mounts from
there. That way, bind-mounts automatically disappear when the child
exits, which simplifies cleanup for the parent. Note: clone is more
convenient than fork + unshare since the parent can directly write
the child uid_map/gid_map files. */
pid_t child = syscall (SYS_clone, SIGCHLD | CLONE_NEWNS | CLONE_NEWUSER,
NULL, NULL, NULL);
switch (child)
{
case 0:
/* Note: Due to <https://bugzilla.kernel.org/show_bug.cgi?id=183461>
we cannot make NEW_ROOT a tmpfs (which would have saved the need
for 'rm_rf'.) */
mirror_directory ("/", new_root, bind_mount);
mkdir_p (new_store);
err = mount (store, new_store, "none", MS_BIND | MS_REC | MS_RDONLY,
NULL);
if (err < 0)
assert_perror (errno);
chdir (new_root);
err = chroot (new_root);
if (err < 0)
assert_perror (errno);
/* Change back to where we were before chroot'ing. */
chdir (cwd);
int err = execv ("@WRAPPED_PROGRAM@", argv);
if (err < 0)
assert_perror (errno);
break;
case -1:
/* Failure: user namespaces not supported. */
fprintf (stderr, "%s: error: 'clone' failed: %m\n", argv[0]);
rm_rf (new_root);
free (new_root);
break;
default:
{
/* Map the current user/group ID in the child's namespace (the
default is to get the "overflow UID", i.e., the UID of
"nobody"). We must first disallow 'setgroups' for that
process. */
disallow_setgroups (child);
write_id_map (child, "uid_map", getuid ());
write_id_map (child, "gid_map", getgid ());
int status, status_other;
waitpid (child, &status, 0);
chdir ("/"); /* avoid EBUSY */
if (is_tmpfs)
{
/* NEW_ROOT lives on in child processes and we no longer need it
to exist as an empty directory in the global namespace. */
umount (new_root);
rmdir (new_root);
}
/* Check whether there are child processes left. If there are none,
we can remove NEW_ROOT just fine. Conversely, if there are
processes left (for example because this wrapper's child forked),
we have to leave NEW_ROOT behind so that those processes can still
access their root file system (XXX). */
else if (waitpid (-1 , &status_other, WNOHANG) == -1)
rm_rf (new_root);
free (new_root);
if (WIFEXITED (status))
exit (WEXITSTATUS (status));
else
/* Abnormal termination cannot really be reproduced, so exit
with 255. */
exit (255);
}
}
}
#ifdef PROOT_PROGRAM
/* Execute the wrapped program with PRoot, passing it ARGC and ARGV, and
"bind-mounting" STORE in the right place. */
static void
exec_with_proot (const char *store, int argc, char *argv[])
{
int proot_specific_argc = 4;
int proot_argc = argc + proot_specific_argc;
char *proot_argv[proot_argc + 1], *proot;
char bind_spec[strlen (store) + 1 + sizeof original_store];
strcpy (bind_spec, store);
strcat (bind_spec, ":");
strcat (bind_spec, original_store);
proot = concat (store, PROOT_PROGRAM);
proot_argv[0] = proot;
proot_argv[1] = "-b";
proot_argv[2] = bind_spec;
proot_argv[3] = "@WRAPPED_PROGRAM@";
for (int i = 0; i < argc; i++)
proot_argv[i + proot_specific_argc] = argv[i + 1];
proot_argv[proot_argc] = NULL;
/* Seccomp support seems to invariably lead to segfaults; disable it by
default. */
setenv ("PROOT_NO_SECCOMP", "1", 0);
int err = execv (proot, proot_argv);
if (err < 0)
assert_perror (errno);
}
#endif
#if HAVE_EXEC_WITH_LOADER
/* Traverse PATH, a NULL-terminated string array, and return a colon-separated
search path where each item of PATH has been relocated to STORE. The
result is malloc'd. */
static char *
relocated_search_path (const char *path[], const char *store)
{
char *new_path;
size_t size = 0;
for (size_t i = 0; path[i] != NULL; i++)
size += strlen (store) + strlen (path[i]) + 1; /* upper bound */
new_path = xmalloc (size + 1);
new_path[0] = '\0';
for (size_t i = 0; path[i] != NULL; i++)
{
if (strncmp (path[i], original_store,
sizeof original_store - 1) == 0)
{
strcat (new_path, store);
strcat (new_path, path[i] + sizeof original_store - 1);
}
else
strcat (new_path, path[i]); /* possibly $ORIGIN */
strcat (new_path, ":");
}
new_path[strlen (new_path) - 1] = '\0'; /* Remove trailing colon. */
return new_path;
}
/* Concatenate PATH1 and PATH2 with a colon in between. The result is
potentially malloc'd. */
static char *
concat_paths (const char *path1, const char *path2)
{
if (path1[0] == '\0')
return (char *) path2;
else
{
char *result = xmalloc (strlen (path1) + strlen (path2) + 2);
strcpy (result, path1);
strcat (result, ":");
strcat (result, path2);
return result;
}
}
/* Execute the wrapped program by invoking the loader (ld.so) directly,
passing it the audit module and preloading libfakechroot.so. */
static void
exec_with_loader (const char *store, int argc, char *argv[])
{
static const char *audit_library_path[] = LOADER_AUDIT_RUNPATH;
char *loader = concat (store,
PROGRAM_INTERPRETER + sizeof original_store);
size_t loader_specific_argc = 8;
size_t loader_argc = argc + loader_specific_argc;
char *loader_argv[loader_argc + 1];
loader_argv[0] = argv[0];
loader_argv[1] = "--audit";
loader_argv[2] = concat (store,
LOADER_AUDIT_MODULE + sizeof original_store);
/* The audit module depends on libc.so and libgcc_s.so so honor
AUDIT_LIBRARY_PATH. Additionally, honor $LD_LIBRARY_PATH if set. */
loader_argv[3] = "--library-path";
loader_argv[4] =
concat_paths (getenv ("LD_LIBRARY_PATH") ?: "",
relocated_search_path (audit_library_path, store));
loader_argv[5] = "--preload";
loader_argv[6] = concat (store,
FAKECHROOT_LIBRARY + sizeof original_store);
loader_argv[7] = concat (store,
"@WRAPPED_PROGRAM@" + sizeof original_store);
for (size_t i = 0; i < argc; i++)
loader_argv[i + loader_specific_argc] = argv[i + 1];
loader_argv[loader_argc] = NULL;
/* Set up the root directory. */
int err;
char *new_root = mkdtemp (strdup ("/tmp/guix-exec-XXXXXX"));
mirror_directory ("/", new_root, make_symlink);
/* 'mirror_directory' created a symlink for the ancestor of ORIGINAL_STORE,
typically "/gnu". Remove that entry so we can create NEW_STORE
below. */
const char *slash = strchr (original_store + 1, '/');
const char *top = slash != NULL
? strndupa (original_store, slash - original_store)
: original_store;
char *new_store_top = concat (new_root, top);
unlink (new_store_top);
/* Now create the store under NEW_ROOT. */
char *new_store = concat (new_root, original_store);
char *new_store_parent = dirname (strdup (new_store));
mkdir_p (new_store_parent);
err = symlink (store, new_store);
if (err < 0)
assert_perror (errno);
#ifdef GCONV_DIRECTORY
/* Tell libc where to find its gconv modules. This is necessary because
gconv uses non-interposable 'open' calls. */
char *gconv_path = concat (store,
GCONV_DIRECTORY + sizeof original_store);
setenv ("GCONV_PATH", gconv_path, 1);
free (gconv_path);
#endif
setenv ("FAKECHROOT_BASE", new_root, 1);
/* Become the new parent of grand-children when their parent dies. */
prctl (PR_SET_CHILD_SUBREAPER, 1);
pid_t child = fork ();
switch (child)
{
case 0:
err = execv (loader, loader_argv);
if (err < 0)
assert_perror (errno);
exit (EXIT_FAILURE);
break;
case -1:
assert_perror (errno);
exit (EXIT_FAILURE);
break;
default:
{
int status, status_other;
waitpid (child, &status, 0);
/* If there are child processes still running, leave NEW_ROOT around
so they can still access it. XXX: In that case NEW_ROOT is left
behind. */
if (waitpid (-1 , &status_other, WNOHANG) == -1)
{
chdir ("/"); /* avoid EBUSY */
rm_rf (new_root);
}
free (new_root);
close (2); /* flushing stderr should be silent */
if (WIFEXITED (status))
exit (WEXITSTATUS (status));
else
/* Abnormal termination cannot really be reproduced, so exit
with 255. */
exit (255);
}
}
}
#endif
/* Execution engines. */
struct engine
{
const char *name;
void (* exec) (const char *, int, char **);
};
static void
buffer_stderr (void)
{
static char stderr_buffer[4096];
setvbuf (stderr, stderr_buffer, _IOFBF, sizeof stderr_buffer);
}
/* The default engine: choose a robust method. */
static void
exec_default (const char *store, int argc, char *argv[])
{
/* Buffer stderr so that nothing's displayed if 'exec_in_user_namespace'
fails but 'exec_with_proot' works. */
buffer_stderr ();
exec_in_user_namespace (store, argc, argv);
#ifdef PROOT_PROGRAM
exec_with_proot (store, argc, argv);
#endif
}
/* The "performance" engine: choose performance over robustness. */
static void
exec_performance (const char *store, int argc, char *argv[])
{
buffer_stderr ();
exec_in_user_namespace (store, argc, argv);
#if HAVE_EXEC_WITH_LOADER
exec_with_loader (store, argc, argv);
#endif
}
/* List of supported engines. */
static const struct engine engines[] =
{
{ "default", exec_default },
{ "performance", exec_performance },
{ "userns", exec_in_user_namespace },
#ifdef PROOT_PROGRAM
{ "proot", exec_with_proot },
#endif
#if HAVE_EXEC_WITH_LOADER
{ "fakechroot", exec_with_loader },
#endif
{ NULL, NULL }
};
/* Return the "execution engine" to use. */
static const struct engine *
execution_engine (void)
{
const char *str = getenv ("GUIX_EXECUTION_ENGINE");
if (str == NULL)
str = "default";
try:
for (const struct engine *engine = engines;
engine->name != NULL;
engine++)
{
if (strcmp (engine->name, str) == 0)
return engine;
}
fprintf (stderr, "%s: unsupported Guix execution engine; ignoring\n",
str);
str = "default";
goto try;
}
int
main (int argc, char *argv[])
{
ssize_t size;
char self[PATH_MAX];
size = readlink ("/proc/self/exe", self, sizeof self - 1);
assert (size > 0);
/* SELF is something like "/home/ludo/.local/gnu/store/…-foo/bin/ls" and we
want to extract "/home/ludo/.local/gnu/store". */
size_t index = strlen (self)
- strlen (WRAPPER_PROGRAM) + strlen (original_store);
char *store = strdup (self);
store[index] = '\0';
struct stat statbuf;
/* If STORE is already at the "right" place, we can execute
@WRAPPED_PROGRAM@ right away. This is not just an optimization: it's
needed when running one of these wrappers from within an unshare'd
namespace, because 'unshare' fails with EPERM in that context. */
if (strcmp (store, original_store) != 0
&& lstat ("@WRAPPED_PROGRAM@", &statbuf) != 0)
{
const struct engine *engine = execution_engine ();
engine->exec (store, argc, argv);
/* If we reach this point, that's because ENGINE failed to do the
job. */
fprintf (stderr, "\
This may be because \"user namespaces\" are not supported on this system.\n\
Consequently, we cannot run '@WRAPPED_PROGRAM@',\n\
unless you move it to the '@STORE_DIRECTORY@' directory.\n\
\n\
Please refer to the 'guix pack' documentation for more information.\n");
return EXIT_FAILURE;
}
/* The executable is available under @STORE_DIRECTORY@, so we can now
execute it. */
int err = execv ("@WRAPPED_PROGRAM@", argv);
if (err < 0)
assert_perror (errno);
return EXIT_FAILURE;
}