Commit ab1f0633 authored by Lennart Poettering's avatar Lennart Poettering
Browse files

exec: optionally apply cgroup attributes to the cgroups we create

parent 5ed27dbd
......@@ -627,7 +627,8 @@ libsystemd_core_la_SOURCES = \
src/condition.c \
src/dbus-common.c \
src/sd-daemon.c \
src/install.c
src/install.c \
src/cgroup-attr.c
nodist_libsystemd_core_la_SOURCES = \
src/load-fragment-gperf.c \
......
......@@ -132,7 +132,7 @@
<varlistentry>
<term><option>--directory=</option></term>
<term><option>--D</option></term>
<term><option>-D</option></term>
<listitem><para>Directory to use as
file system root for the namespace
......@@ -207,7 +207,7 @@
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>chroot</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>debootstrap</refentrytitle><manvolnum>8</manvolnum></citerefentry>
<citerefentry><refentrytitle>debootstrap</refentrytitle><manvolnum>8</manvolnum></citerefentry>,
<citerefentry><refentrytitle>mock</refentrytitle><manvolnum>1</manvolnum></citerefentry>
</para>
</refsect1>
......
......@@ -629,18 +629,6 @@
for details.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ControlGroupModify=</varname></term>
<listitem><para>Takes a boolean
argument. If true, the control groups
created for this unit will be owned by
ther user specified with
<varname>User=</varname> (and the
configured group), and he can create
subgroups as well as add processes to
the group.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>CapabilityBoundingSet=</varname></term>
......@@ -718,9 +706,9 @@
where "cpu" identifies the kernel
control group controller used, and
<filename>/foo/bar</filename> is the
control group path. The controller name
and ":" may be omitted in which case
the named systemd control group
control group path. The controller
name and ":" may be omitted in which
case the named systemd control group
hierarchy is implied. Alternatively,
the path and ":" may be omitted, in
which case the default control group
......@@ -728,20 +716,138 @@
option may be used to place executed
processes in arbitrary groups in
arbitrary hierarchies -- which can be
configured externally with additional execution limits. By default
systemd will place all executed
processes in separate per-unit control
groups (named after the unit) in the
systemd named hierarchy. Since every
process can be in one group per
hierarchy only overriding the control group
path in the named systemd hierarchy
will disable automatic placement in
the default group. For details about control
groups see <ulink
configured externally with additional
execution limits. By default systemd
will place all executed processes in
separate per-unit control groups
(named after the unit) in the systemd
named hierarchy. Since every process
can be in one group per hierarchy only
overriding the control group path in
the named systemd hierarchy will
disable automatic placement in the
default group. This option is
primarily intended to place executed
processes in specific paths in
specific kernel controller
hierarchies. It is however not
recommended to manipulate the service
control group path in the systemd
named hierarchy. For details about
control groups see <ulink
url="http://www.kernel.org/doc/Documentation/cgroups/cgroups.txt">cgroups.txt</ulink>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ControlGroupModify=</varname></term>
<listitem><para>Takes a boolean
argument. If true, the control groups
created for this unit will be owned by
the user specified with
<varname>User=</varname> (and the
appropriate group), and he/she can create
subgroups as well as add processes to
the group.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ControlGroupAttribute=</varname></term>
<listitem><para>Set a specific control
group attribute for executed
processes, and (if needed) add the the
executed processes to a cgroup in the
hierarchy of the controller the
attribute belongs to. Takes two
space-separated arguments: the
attribute name (syntax is
<literal>cpu.shares</literal> where
<literal>cpu</literal> refers to a
specific controller and
<literal>shares</literal> to the
attribute name), and the attribute
value. Example:
<literal>ControlGroupAttribute=cpu.shares
512</literal>. If this option is used
for an attribute that belongs to a
kernel controller hierarchy the unit
is not already configured to be added
to (for example via the
<literal>ControlGroup=</literal>
option) then the unit will be added to
the controller and the default unit
cgroup path is implied. Thus, using
<varname>ControlGroupAttribute=</varname>
is in most case sufficient to make use
of control group enforcements,
explicit
<varname>ControlGroup=</varname> are
only necessary in case the implied
default control group path for a
service is not desirable. For details
about control group attributes see
<ulink
url="http://www.kernel.org/doc/Documentation/cgroups/cgroups.txt">cgroups.txt</ulink>. This
option may appear more than once, in
order to set multiple control group
attributes.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>CPUShares=</varname></term>
<listitem><para>Assign the specified
overall CPU time shares to the processes executed. Takes
an integer value. This controls the
<literal>cpu.shares</literal> control
group attribute. For details about
this control group attribute see <ulink
url="http://www.kernel.org/doc/Documentation/scheduler/sched-design-CFS.txt">sched-design-CFS.txt</ulink>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>MemoryLimit=</varname></term>
<term><varname>MemorySoftLimit=</varname></term>
<listitem><para>Limit the overall memory usage
of the executed processes to a certain
size. Takes a memory size in bytes. If
the value is suffixed with K, M, G or
T the specified memory size is parsed
as Kilobytes, Megabytes, Gigabytes
resp. Terabytes (to the base
1024). This controls the
<literal>memory.limit_in_bytes</literal>
and
<literal>memory.soft_limit_in_bytes</literal>
control group attributes. For details
about these control group attributes
see <ulink
url="http://www.kernel.org/doc/Documentation/cgroups/memory.txt">memory.txt</ulink>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>DeviceAllow=</varname></term>
<term><varname>DeviceDeny=</varname></term>
<listitem><para>Control access to
specific device nodes by the executed processes. Takes two
space separated strings: a device node
path (such as
<filename>/dev/null</filename>)
followed by a combination of r, w, m
to control reading, writing resp.
creating of the specific device node
by the unit. This controls the
<literal>devices.allow</literal>
and
<literal>devices.deny</literal>
control group attributes. For details
about these control group attributes
see <ulink
url="http://www.kernel.org/doc/Documentation/cgroups/devices.txt">devices.txt</ulink>.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>ReadWriteDirectories=</varname></term>
<term><varname>ReadOnlyDirectories=</varname></term>
......
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2011 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include "cgroup-attr.h"
#include "cgroup-util.h"
#include "list.h"
int cgroup_attribute_apply(CGroupAttribute *a, CGroupBonding *b) {
int r;
char *path = NULL;
char *v = NULL;
assert(a);
b = cgroup_bonding_find_list(b, a->controller);
if (!b)
return 0;
if (a->map_callback) {
r = a->map_callback(a->controller, a->name, a->value, &v);
if (r < 0)
return r;
}
r = cg_get_path(a->controller, b->path, a->name, &path);
if (r < 0) {
free(v);
return r;
}
r = write_one_line_file(path, v ? v : a->value);
if (r < 0)
log_warning("Failed to write '%s' to %s: %s", v ? v : a->value, path, strerror(-r));
free(path);
free(v);
return r;
}
int cgroup_attribute_apply_list(CGroupAttribute *first, CGroupBonding *b) {
CGroupAttribute *a;
int r = 0;
LIST_FOREACH(by_unit, a, first) {
int k;
k = cgroup_attribute_apply(a, b);
if (r == 0)
r = k;
}
return r;
}
CGroupAttribute *cgroup_attribute_find_list(CGroupAttribute *first, const char *controller, const char *name) {
CGroupAttribute *a;
assert(controller);
assert(name);
LIST_FOREACH(by_unit, a, first)
if (streq(a->controller, controller) &&
streq(a->name, name))
return a;
return NULL;
}
static void cgroup_attribute_free(CGroupAttribute *a) {
assert(a);
free(a->controller);
free(a->name);
free(a->value);
free(a);
}
void cgroup_attribute_free_list(CGroupAttribute *first) {
CGroupAttribute *a, *n;
LIST_FOREACH_SAFE(by_unit, a, n, first)
cgroup_attribute_free(a);
}
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
#ifndef foocgroupattrhfoo
#define foocgroupattrhfoo
/***
This file is part of systemd.
Copyright 2011 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
typedef struct CGroupAttribute CGroupAttribute;
typedef int (*CGroupAttributeMapCallback)(const char *controller, const char*name, const char *value, char **ret);
#include "unit.h"
#include "cgroup.h"
struct CGroupAttribute {
char *controller;
char *name;
char *value;
CGroupAttributeMapCallback map_callback;
LIST_FIELDS(CGroupAttribute, by_unit);
};
int cgroup_attribute_apply(CGroupAttribute *a, CGroupBonding *b);
int cgroup_attribute_apply_list(CGroupAttribute *first, CGroupBonding *b);
CGroupAttribute *cgroup_attribute_find_list(CGroupAttribute *first, const char *controller, const char *name);
void cgroup_attribute_free_list(CGroupAttribute *first);
#endif
......@@ -41,8 +41,11 @@ int cgroup_bonding_realize(CGroupBonding *b) {
if (b->realized)
return 0;
if ((r = cg_create(b->controller, b->path)) < 0)
r = cg_create(b->controller, b->path);
if (r < 0) {
log_warning("Failed to create cgroup %s:%s: %s", b->controller, b->path, strerror(-r));
return r;
}
b->realized = true;
......
......@@ -930,6 +930,7 @@ int exec_spawn(ExecCommand *command,
bool apply_tty_stdin,
bool confirm_spawn,
CGroupBonding *cgroup_bondings,
CGroupAttribute *cgroup_attributes,
pid_t *ret) {
pid_t pid;
......@@ -973,9 +974,11 @@ int exec_spawn(ExecCommand *command,
log_debug("About to execute: %s", line);
free(line);
if (cgroup_bondings)
if ((r = cgroup_bonding_realize_list(cgroup_bondings)))
goto fail_parent;
r = cgroup_bonding_realize_list(cgroup_bondings);
if (r < 0)
goto fail_parent;
cgroup_attribute_apply_list(cgroup_attributes, cgroup_bondings);
if ((pid = fork()) < 0) {
r = -errno;
......
......@@ -35,6 +35,7 @@ typedef struct ExecContext ExecContext;
#include <sched.h>
struct CGroupBonding;
struct CGroupAttribute;
#include "list.h"
#include "util.h"
......@@ -187,6 +188,7 @@ int exec_spawn(ExecCommand *command,
bool apply_tty_stdin,
bool confirm_spawn,
struct CGroupBonding *cgroup_bondings,
struct CGroupAttribute *cgroup_attributes,
pid_t *ret);
void exec_command_done(ExecCommand *c);
......
......@@ -70,7 +70,7 @@ int kmod_setup(void) {
command.argv = (char**) cmdline;
exec_context_init(&context);
r = exec_spawn(&command, NULL, &context, NULL, 0, NULL, false, false, false, false, NULL, &pid);
r = exec_spawn(&command, NULL, &context, NULL, 0, NULL, false, false, false, false, NULL, NULL, &pid);
exec_context_done(&context);
if (r < 0) {
......
......@@ -64,7 +64,13 @@ $1.LimitMSGQUEUE, config_parse_limit, RLIMIT_MSGQ
$1.LimitNICE, config_parse_limit, RLIMIT_NICE, offsetof($1, exec_context.rlimit)
$1.LimitRTPRIO, config_parse_limit, RLIMIT_RTPRIO, offsetof($1, exec_context.rlimit)
$1.LimitRTTIME, config_parse_limit, RLIMIT_RTTIME, offsetof($1, exec_context.rlimit)
$1.ControlGroup, config_parse_unit_cgroup, 0, offsetof($1, exec_context)
$1.ControlGroup, config_parse_unit_cgroup, 0, 0
$1.ControlGroupAttribute, config_parse_unit_cgroup_attr, 0, 0
$1.CPUShares, config_parse_unit_cpu_shares, 0, 0
$1.MemoryLimit, config_parse_unit_memory_limit, 0, 0
$1.MemorySoftLimit, config_parse_unit_memory_limit, 0, 0
$1.DeviceAllow, config_parse_unit_device_allow, 0, 0
$1.DeviceDeny, config_parse_unit_device_allow, 0, 0
$1.ReadWriteDirectories, config_parse_path_strv, 0, offsetof($1, exec_context.read_write_dirs)
$1.ReadOnlyDirectories, config_parse_path_strv, 0, offsetof($1, exec_context.read_only_dirs)
$1.InaccessibleDirectories, config_parse_path_strv, 0, offsetof($1, exec_context.inaccessible_dirs)
......
......@@ -1639,6 +1639,201 @@ int config_parse_unit_condition_null(
DEFINE_CONFIG_PARSE_ENUM(config_parse_notify_access, notify_access, NotifyAccess, "Failed to parse notify access specifier");
int config_parse_unit_cgroup_attr(
const char *filename,
unsigned line,
const char *section,
const char *lvalue,
int ltype,
const char *rvalue,
void *data,
void *userdata) {
Unit *u = data;
char **l;
int r;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
l = strv_split_quoted(rvalue);
if (!l)
return -ENOMEM;
if (strv_length(l) != 2) {
log_error("[%s:%u] Failed to parse cgroup attribute value, ignoring: %s", filename, line, rvalue);
strv_free(l);
return 0;
}
r = unit_add_cgroup_attribute(u, NULL, l[0], l[1], NULL);
strv_free(l);
if (r < 0) {
log_error("[%s:%u] Failed to add cgroup attribute value, ignoring: %s", filename, line, rvalue);
return 0;
}
return 0;
}
int config_parse_unit_cpu_shares(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
Unit *u = data;
int r;
unsigned long ul;
char *t;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
if (safe_atolu(rvalue, &ul) < 0 || ul < 1) {
log_error("[%s:%u] Failed to parse CPU shares value, ignoring: %s", filename, line, rvalue);
return 0;
}
if (asprintf(&t, "%lu", ul) < 0)
return -ENOMEM;
r = unit_add_cgroup_attribute(u, "cpu", "cpu.shares", t, NULL);
free(t);
if (r < 0) {
log_error("[%s:%u] Failed to add cgroup attribute value, ignoring: %s", filename, line, rvalue);
return 0;
}
return 0;
}
int config_parse_unit_memory_limit(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
Unit *u = data;
int r;
off_t sz;
char *t;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
if (parse_bytes(rvalue, &sz) < 0 || sz <= 0) {
log_error("[%s:%u] Failed to parse memory limit value, ignoring: %s", filename, line, rvalue);
return 0;
}
if (asprintf(&t, "%llu", (unsigned long long) sz) < 0)
return -ENOMEM;
r = unit_add_cgroup_attribute(u,
"memory",
streq(lvalue, "MemorySoftLimit") ? "memory.soft_limit_in_bytes" : "memory.limit_in_bytes",
t, NULL);
free(t);
if (r < 0) {
log_error("[%s:%u] Failed to add cgroup attribute value, ignoring: %s", filename, line, rvalue);
return 0;
}
return 0;
}
static int device_map(const char *controller, const char *name, const char *value, char **ret) {
struct stat st;
char **l;
l = strv_split_quoted(value);
if (!l)
return -ENOMEM;
assert(strv_length(l) >= 1);
if (streq(l[0], "*")) {
if (asprintf(ret, "a *:*%s%s",
isempty(l[1]) ? "" : " ", strempty(l[1])) < 0) {
strv_free(l);
return -ENOMEM;
}
} else {
if (lstat(l[0], &st) < 0) {
log_warning("Couldn't stat device %s", l[0]);
strv_free(l);
return -errno;
}
if (!S_ISCHR(st.st_mode) && !S_ISBLK(st.st_mode)) {
log_warning("%s is not a device.", l[0]);
strv_free(l);
return -ENODEV;
}
if (asprintf(ret, "%c %u:%u%s%s",
S_ISCHR(st.st_mode) ? 'c' : 'b',
major(st.st_rdev), minor(st.st_rdev),
isempty(l[1]) ? "" : " ", strempty(l[1])) < 0) {
strv_free(l);
return -ENOMEM;
}
}
strv_free(l);
return 0;
}
int config_parse_unit_device_allow(const char *filename, unsigned line, const char *section, const char *lvalue, int ltype, const char *rvalue, void *data, void *userdata) {
Unit *u = data;
char **l;
int r;
unsigned k;
assert(filename);
assert(lvalue);
assert(rvalue);
assert(data);
l = strv_split_quoted(rvalue);
if (!l)
return -ENOMEM;
k = strv_length(l);
if (k < 1 || k > 2) {
log_error("[%s:%u] Failed to parse device value, ignoring: %s", filename, line, rvalue);
strv_free(l);
return 0;
}
if (!streq(l[0], "*") && !path_startswith(l[0], "/dev")) {
log_error("[%s:%u] Device node path not absolute, ignoring: %s", filename, line, rvalue);
strv_free(l);
return 0;
}