Commit 7027ff61 authored by Lennart Poettering's avatar Lennart Poettering

nspawn: introduce the new /machine/ tree in the cgroup tree and move containers there

Containers will now carry a label (normally derived from the root
directory name, but configurable by the user), and the container's root
cgroup is /machine/<label>. This label is called "machine name", and can
cover both containers and VMs (as soon as libvirt also makes use of
/machine/).

libsystemd-login can be used to query the machine name from a process.

This patch also includes numerous clean-ups for the cgroup code.
parent cec4ead9
......@@ -47,7 +47,8 @@
<refname>sd_pid_get_unit</refname>
<refname>sd_pid_get_user_unit</refname>
<refname>sd_pid_get_owner_uid</refname>
<refpurpose>Determine session, service or owner of a session of a specific PID</refpurpose>
<refname>sd_pid_get_machine_name</refname>
<refpurpose>Determine session, service, owner of a session or container/VM of a specific PID</refpurpose>
</refnamediv>
<refsynopsisdiv>
......@@ -77,6 +78,12 @@
<paramdef>pid_t <parameter>pid</parameter></paramdef>
<paramdef>uid_t* <parameter>uid</parameter></paramdef>
</funcprototype>
<funcprototype>
<funcdef>int <function>sd_pid_get_machine_name</function></funcdef>
<paramdef>pid_t <parameter>pid</parameter></paramdef>
<paramdef>char** <parameter>name</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
......@@ -108,7 +115,7 @@
function will fail. (More specifically: this call will
not work for processes that are part of user units,
use <function>sd_pid_get_user_unit()</function> for
that.) The returned string needs to be freed with the
that.) The returned string needs to be freed with the
libc
<citerefentry><refentrytitle>free</refentrytitle><manvolnum>3</manvolnum></citerefentry>
call after use.</para>
......@@ -131,6 +138,14 @@
and not being a shared process of a user this function
will fail.</para>
<para><function>sd_pid_machine_name()</function> may
be used to determine the name of the VM or container
is a member of. The machine name is a short string,
suitable for usage in file system paths. The returned
string needs to be freed with the libc
<citerefentry><refentrytitle>free</refentrytitle><manvolnum>3</manvolnum></citerefentry>
call after use.</para>
<para>If the <literal>pid</literal> parameter of any
of these functions is passed as 0 the operation is
executed for the calling process.</para>
......@@ -149,10 +164,11 @@
<para>The <function>sd_pid_get_session()</function>,
<function>sd_pid_get_unit()</function>,
<function>sd_pid_get_user_unit()</function>, and
<function>sd_pid_get_owner_uid()</function> interfaces
are available as shared library, which can be compiled
and linked to with the
<function>sd_pid_get_user_unit()</function>,
<function>sd_pid_get_owner_uid()</function> and
<function>sd_pid_get_machine_name()</function>
interfaces are available as shared library, which can
be compiled and linked to with the
<literal>libsystemd-login</literal>
<citerefentry><refentrytitle>pkg-config</refentrytitle><manvolnum>1</manvolnum></citerefentry>
file.</para>
......
......@@ -202,6 +202,21 @@
</para></listitem>
</varlistentry>
<varlistentry>
<term><option>-M</option></term>
<term><option>--machine=</option></term>
<listitem><para>Sets the machine name
for this container. This name may be
used to identify this container on the
host, and is used to initialize the
container's hostname (which the
container can choose to override,
however). If not specified the last
component of the root directory of the
container is used.</para></listitem>
</varlistentry>
<varlistentry>
<term><option>--uuid=</option></term>
......
......@@ -171,19 +171,14 @@ int main(int argc, char *argv[]) {
arg_kernel_threads, output_flags);
} else {
char _cleanup_free_ *root = NULL;
const char *t = NULL;
r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &root);
if (r < 0)
t = "/";
else {
if (endswith(root, "/system"))
root[strlen(root)-7] = 0;
t = root[0] ? root : "/";
r = cg_get_root_path(&root);
if (r < 0) {
log_error("Failed to get root path: %s", strerror(-r));
goto finish;
}
r = show_cgroup(SYSTEMD_CGROUP_CONTROLLER, t, NULL, 0,
r = show_cgroup(SYSTEMD_CGROUP_CONTROLLER, root, NULL, 0,
arg_kernel_threads, output_flags);
}
}
......
......@@ -320,8 +320,9 @@ int cgroup_bonding_is_empty_list(CGroupBonding *first) {
int manager_setup_cgroup(Manager *m) {
_cleanup_free_ char *current = NULL, *path = NULL;
char suffix_buffer[sizeof("/systemd-") + DECIMAL_STR_MAX(pid_t)];
const char *suffix;
int r;
char suffix[sizeof("/systemd-") + DECIMAL_STR_MAX(pid_t)];
assert(m);
......@@ -332,17 +333,17 @@ int manager_setup_cgroup(Manager *m) {
}
/* 1. Determine hierarchy */
r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 0, &current);
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, 0, &current);
if (r < 0) {
log_error("Cannot determine cgroup we are running in: %s", strerror(-r));
return r;
}
if (m->running_as == SYSTEMD_SYSTEM)
strcpy(suffix, "/system");
suffix = "/system";
else {
snprintf(suffix, sizeof(suffix), "/systemd-%lu", (unsigned long) getpid());
char_array_0(suffix);
sprintf(suffix_buffer, "/systemd-%lu", (unsigned long) getpid());
suffix = suffix_buffer;
}
free(m->cgroup_hierarchy);
......@@ -350,11 +351,14 @@ int manager_setup_cgroup(Manager *m) {
/* We probably got reexecuted and can continue to use our root cgroup */
m->cgroup_hierarchy = current;
current = NULL;
} else {
/* We need a new root cgroup */
m->cgroup_hierarchy = NULL;
if (asprintf(&m->cgroup_hierarchy, "%s%s", streq(current, "/") ? "" : current, suffix) < 0)
if (streq(current, "/"))
m->cgroup_hierarchy = strdup(suffix);
else
m->cgroup_hierarchy = strappend(current, suffix);
if (!m->cgroup_hierarchy)
return log_oom();
}
......@@ -509,7 +513,7 @@ Unit* cgroup_unit_by_pid(Manager *m, pid_t pid) {
if (pid <= 1)
return NULL;
if (cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &group) < 0)
if (cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &group) < 0)
return NULL;
l = hashmap_get(m->cgroup_bondings, group);
......
......@@ -1034,7 +1034,7 @@ int bus_unit_cgroup_unset(Unit *u, DBusMessageIter *iter) {
unit_remove_drop_in(u, runtime, controller);
/* Try to migrate the old group away */
if (cg_get_by_pid(controller, 0, &target) >= 0)
if (cg_pid_get_path(controller, 0, &target) >= 0)
cgroup_bonding_migrate_to(u->cgroup_bondings, target, false);
cgroup_bonding_free(b, true);
......
......@@ -420,36 +420,6 @@ void server_vacuum(Server *s) {
s->cached_available_space_timestamp = 0;
}
static char *shortened_cgroup_path(pid_t pid) {
int r;
char _cleanup_free_ *process_path = NULL, *init_path = NULL;
char *path;
assert(pid > 0);
r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &process_path);
if (r < 0)
return NULL;
r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, 1, &init_path);
if (r < 0)
return NULL;
if (endswith(init_path, "/system"))
init_path[strlen(init_path) - 7] = 0;
else if (streq(init_path, "/"))
init_path[0] = 0;
if (startswith(process_path, init_path)) {
path = strdup(process_path + strlen(init_path));
} else {
path = process_path;
process_path = NULL;
}
return path;
}
bool shall_try_append_again(JournalFile *f, int r) {
/* -E2BIG Hit configured limit
......@@ -620,8 +590,8 @@ static void dispatch_message_real(
IOVEC_SET_STRING(iovec[n++], audit_loginuid);
#endif
t = shortened_cgroup_path(ucred->pid);
if (t) {
r = cg_pid_get_path(NULL, ucred->pid, &t);
if (r >= 0) {
cgroup = strappend("_SYSTEMD_CGROUP=", t);
free(t);
......@@ -630,7 +600,8 @@ static void dispatch_message_real(
}
#ifdef HAVE_LOGIND
if (sd_pid_get_session(ucred->pid, &t) >= 0) {
r = cg_pid_get_session(ucred->pid, &t);
if (r >= 0) {
session = strappend("_SYSTEMD_SESSION=", t);
free(t);
......@@ -773,7 +744,7 @@ void server_dispatch_message(
const char *unit_id,
int priority) {
int rl;
int rl, r;
char _cleanup_free_ *path = NULL;
char *c;
......@@ -789,8 +760,8 @@ void server_dispatch_message(
if (!ucred)
goto finish;
path = shortened_cgroup_path(ucred->pid);
if (!path)
r = cg_pid_get_path_shifted(ucred->pid, NULL, &path);
if (r < 0)
goto finish;
/* example: /user/lennart/3/foobar
......
......@@ -68,4 +68,5 @@ global:
LIBSYSTEMD_LOGIN_202 {
global:
sd_pid_get_user_unit;
sd_pid_get_machine_name;
} LIBSYSTEMD_LOGIN_201;
......@@ -506,7 +506,7 @@ static int bus_manager_create_session(Manager *m, DBusMessage *message, DBusMess
dbus_message_iter_get_basic(&iter, &kill_processes);
r = cg_pid_get_cgroup(leader, NULL, &cgroup);
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, leader, &cgroup);
if (r < 0)
goto fail;
......
......@@ -436,7 +436,6 @@ static int session_create_one_group(Session *s, const char *controller, const ch
int r;
assert(s);
assert(controller);
assert(path);
if (s->leader > 0) {
......
......@@ -1100,21 +1100,18 @@ int manager_get_user_by_cgroup(Manager *m, const char *cgroup, User **user) {
}
int manager_get_session_by_pid(Manager *m, pid_t pid, Session **session) {
char *p;
_cleanup_free_ char *p = NULL;
int r;
assert(m);
assert(pid >= 1);
assert(session);
r = cg_get_by_pid(SYSTEMD_CGROUP_CONTROLLER, pid, &p);
r = cg_pid_get_path(SYSTEMD_CGROUP_CONTROLLER, pid, &p);
if (r < 0)
return r;
r = manager_get_session_by_cgroup(m, p, session);
free(p);
return r;
return manager_get_session_by_cgroup(m, p, session);
}
void manager_cgroup_notify_empty(Manager *m, const char *cgroup) {
......
......@@ -33,51 +33,19 @@
#include "fileio.h"
_public_ int sd_pid_get_session(pid_t pid, char **session) {
int r;
char *cgroup, *p;
if (pid < 0)
return -EINVAL;
if (!session)
return -EINVAL;
r = cg_pid_get_cgroup(pid, NULL, &cgroup);
if (r < 0)
return r;
if (!startswith(cgroup, "/user/")) {
free(cgroup);
return -ENOENT;
}
p = strchr(cgroup + 6, '/');
if (!p) {
free(cgroup);
return -ENOENT;
}
p++;
if (startswith(p, "shared/") || streq(p, "shared")) {
free(cgroup);
return -ENOENT;
}
p = strndup(p, strcspn(p, "/"));
free(cgroup);
if (!p)
return -ENOMEM;
*session = p;
return 0;
return cg_pid_get_session(pid, session);
}
_public_ int sd_pid_get_unit(pid_t pid, char **unit) {
if (pid < 0)
return -EINVAL;
if (!unit)
return -EINVAL;
......@@ -88,13 +56,22 @@ _public_ int sd_pid_get_user_unit(pid_t pid, char **unit) {
if (pid < 0)
return -EINVAL;
if (!unit)
return -EINVAL;
return cg_pid_get_user_unit(pid, unit);
}
_public_ int sd_pid_get_machine_name(pid_t pid, char **name) {
if (pid < 0)
return -EINVAL;
if (!name)
return -EINVAL;
return cg_pid_get_machine_name(pid, name);
}
_public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) {
int r;
char *root, *cgroup, *p, *cc;
......@@ -106,7 +83,7 @@ _public_ int sd_pid_get_owner_uid(pid_t pid, uid_t *uid) {
if (!uid)
return -EINVAL;
r = cg_pid_get_cgroup(pid, &root, &cgroup);
r = cg_pid_get_path_shifted(pid, &root, &cgroup);
if (r < 0)
return r;
......
This diff is collapsed.
......@@ -40,9 +40,6 @@ int cg_create(const char *controller, const char *path, const char *suffix) {
_cleanup_free_ char *fs = NULL;
int r;
assert(controller);
assert(path);
r = cg_get_path_and_check(controller, path, suffix, &fs);
if (r < 0)
return r;
......@@ -65,8 +62,6 @@ int cg_create(const char *controller, const char *path, const char *suffix) {
int cg_create_and_attach(const char *controller, const char *path, pid_t pid) {
int r, q;
assert(controller);
assert(path);
assert(pid >= 0);
r = cg_create(controller, path, NULL);
......
......@@ -104,20 +104,20 @@ static void show_pid_array(int pids[], unsigned n_pids, const char *prefix, unsi
static int show_cgroup_one_by_path(const char *path, const char *prefix, unsigned n_columns, bool more, bool kernel_threads, OutputFlags flags) {
char *fn;
FILE *f;
_cleanup_fclose_ FILE *f = NULL;
size_t n = 0, n_allocated = 0;
pid_t *pids = NULL;
char *p;
_cleanup_free_ pid_t *pids = NULL;
char *p = NULL;
pid_t pid;
int r;
r = cg_fix_path(path, &p);
r = cg_mangle_path(path, &p);
if (r < 0)
return r;
r = asprintf(&fn, "%s/cgroup.procs", p);
fn = strappend(p, "/cgroup.procs");
free(p);
if (r < 0)
if (!fn)
return -ENOMEM;
f = fopen(fn, "re");
......@@ -136,10 +136,8 @@ static int show_cgroup_one_by_path(const char *path, const char *prefix, unsigne
n_allocated = MAX(16U, n*2U);
npids = realloc(pids, sizeof(pid_t) * n_allocated);
if (!npids) {
r = -ENOMEM;
goto finish;
}
if (!npids)
return -ENOMEM;
pids = npids;
}
......@@ -149,26 +147,18 @@ static int show_cgroup_one_by_path(const char *path, const char *prefix, unsigne
}
if (r < 0)
goto finish;
return r;
if (n > 0)
show_pid_array(pids, n, prefix, n_columns, false, more, kernel_threads, flags);
r = 0;
finish:
free(pids);
if (f)
fclose(f);
return r;
return 0;
}
int show_cgroup_by_path(const char *path, const char *prefix, unsigned n_columns, bool kernel_threads, OutputFlags flags) {
DIR *d;
char *last = NULL;
char *p1 = NULL, *p2 = NULL, *fn = NULL, *gn = NULL;
_cleanup_free_ char *fn = NULL, *p1 = NULL, *last = NULL, *p2 = NULL;
_cleanup_closedir_ DIR *d = NULL;
char *gn = NULL;
bool shown_pids = false;
int r;
......@@ -180,30 +170,24 @@ int show_cgroup_by_path(const char *path, const char *prefix, unsigned n_columns
if (!prefix)
prefix = "";
r = cg_fix_path(path, &fn);
r = cg_mangle_path(path, &fn);
if (r < 0)
return r;
d = opendir(fn);
if (!d) {
free(fn);
if (!d)
return -errno;
}
while ((r = cg_read_subgroup(d, &gn)) > 0) {
char *k;
_cleanup_free_ char *k = NULL;
r = asprintf(&k, "%s/%s", fn, gn);
k = strjoin(fn, "/", gn, NULL);
free(gn);
if (r < 0) {
r = -ENOMEM;
goto finish;
}
if (!k)
return -ENOMEM;
if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k, false) > 0) {
free(k);
if (!(flags & OUTPUT_SHOW_ALL) && cg_is_empty_recursive(NULL, k, false) > 0)
continue;
}
if (!shown_pids) {
show_cgroup_one_by_path(path, prefix, n_columns, true, kernel_threads, flags);
......@@ -216,11 +200,8 @@ int show_cgroup_by_path(const char *path, const char *prefix, unsigned n_columns
if (!p1) {
p1 = strappend(prefix, draw_special_char(DRAW_TREE_VERT));
if (!p1) {
free(k);
r = -ENOMEM;
goto finish;
}
if (!p1)
return -ENOMEM;
}
show_cgroup_by_path(last, p1, n_columns-2, kernel_threads, flags);
......@@ -228,10 +209,11 @@ int show_cgroup_by_path(const char *path, const char *prefix, unsigned n_columns
}
last = k;
k = NULL;
}
if (r < 0)
goto finish;
return r;
if (!shown_pids)
show_cgroup_one_by_path(path, prefix, n_columns, !!last, kernel_threads, flags);
......@@ -242,43 +224,27 @@ int show_cgroup_by_path(const char *path, const char *prefix, unsigned n_columns
if (!p2) {
p2 = strappend(prefix, " ");
if (!p2) {
r = -ENOMEM;
goto finish;
}
if (!p2)
return -ENOMEM;
}
show_cgroup_by_path(last, p2, n_columns-2, kernel_threads, flags);
}
r = 0;
finish:
free(p1);
free(p2);
free(last);
free(fn);
closedir(d);
return r;
return 0;
}
int show_cgroup(const char *controller, const char *path, const char *prefix, unsigned n_columns, bool kernel_threads, OutputFlags flags) {
char *p;
_cleanup_free_ char *p = NULL;
int r;
assert(controller);
assert(path);
r = cg_get_path(controller, path, NULL, &p);
if (r < 0)
return r;
r = show_cgroup_by_path(p, prefix, n_columns, kernel_threads, flags);
free(p);
return r;
return show_cgroup_by_path(p, prefix, n_columns, kernel_threads, flags);
}
static int show_extra_pids(const char *controller, const char *path, const char *prefix, unsigned n_columns, const pid_t pids[], unsigned n_pids, OutputFlags flags) {
......@@ -295,8 +261,7 @@ static int show_extra_pids(const char *controller, const char *path, const char
if (n_columns <= 0)
n_columns = columns();
if (!prefix)
prefix = "";
prefix = strempty(prefix);
copy = new(pid_t, n_pids);
if (!copy)
......@@ -305,7 +270,7 @@ static int show_extra_pids(const char *controller, const char *path, const char
for (i = 0, j = 0; i < n_pids; i++) {
char _cleanup_free_ *k = NULL;
r = cg_get_by_pid(controller, pids[i], &k);
r = cg_pid_get_path(controller, pids[i], &k);
if (r < 0)
return r;
......@@ -323,7 +288,6 @@ static int show_extra_pids(const char *controller, const char *path, const char
int show_cgroup_and_extra(const char *controller, const char *path, const char *prefix, unsigned n_columns, bool kernel_threads, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags) {
int r;
assert(controller);
assert(path);
r = show_cgroup(controller, path, prefix, n_columns, kernel_threads, flags);
......@@ -334,8 +298,8 @@ int show_cgroup_and_extra(const char *controller, const char *path, const char *
}
int show_cgroup_and_extra_by_spec(const char *spec, const char *prefix, unsigned n_columns, bool kernel_threads, const pid_t extra_pids[], unsigned n_extra_pids, OutputFlags flags) {
int r;
_cleanup_free_ char *controller = NULL, *path = NULL;
int r;
assert(spec);
......
This diff is collapsed.
......@@ -44,11 +44,12 @@ int cg_migrate_recursive(const char *cfrom, const char *pfrom, const char *cto,
int cg_split_spec(const char *spec, char **controller, char **path);
int cg_join_spec(const char *controller, const char *path, char **spec);
int cg_fix_path(const char *path, char **result);
int cg_mangle_path(const char *path, char **result);
int cg_get_path(const char *controller, const char *path, const char *suffix, char **fs);
int cg_get_path_and_check(const char *controller, const char *path, const char *suffix, char **fs);
int cg_get_by_pid(const char *controller, pid_t pid, char **path);