Commit 6624768c authored by Lennart Poettering's avatar Lennart Poettering

readahead: add interface to sd-daemon.[ch] to control readahead

parent f0cf061e
...@@ -421,6 +421,7 @@ MANPAGES = \ ...@@ -421,6 +421,7 @@ MANPAGES = \
man/systemd-cgls.1 \ man/systemd-cgls.1 \
man/systemd-notify.1 \ man/systemd-notify.1 \
man/sd_notify.3 \ man/sd_notify.3 \
man/sd_readahead.3 \
man/sd_booted.3 \ man/sd_booted.3 \
man/sd_listen_fds.3 \ man/sd_listen_fds.3 \
man/sd_is_fifo.3 \ man/sd_is_fifo.3 \
......
...@@ -96,6 +96,10 @@ ...@@ -96,6 +96,10 @@
* readahead() vs. fadvise() vs. ioprio * readahead() vs. fadvise() vs. ioprio
* unneeded
* properly handle multiple inotify events per read() in path.c and util.c
External: External:
* place /etc/inittab with explaining blurb. * place /etc/inittab with explaining blurb.
......
<?xml version='1.0'?> <!--*-nxml-*-->
<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook XML V4.2//EN"
"http://www.oasis-open.org/docbook/xml/4.2/docbookx.dtd">
<!--
This file is part of systemd.
Copyright 2010 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
-->
<refentry id="sd_notify">
<refentryinfo>
<title>sd_readahead</title>
<productname>systemd</productname>
<authorgroup>
<author>
<contrib>Developer</contrib>
<firstname>Lennart</firstname>
<surname>Poettering</surname>
<email>lennart@poettering.net</email>
</author>
</authorgroup>
</refentryinfo>
<refmeta>
<refentrytitle>sd_readahead</refentrytitle>
<manvolnum>3</manvolnum>
</refmeta>
<refnamediv>
<refname>sd_readahead</refname>
<refpurpose>Control ongoing disk read-ahead operations</refpurpose>
</refnamediv>
<refsynopsisdiv>
<funcsynopsis>
<funcsynopsisinfo>#include "sd-daemon.h"</funcsynopsisinfo>
<funcprototype>
<funcdef>int <function>sd_readahead</function></funcdef>
<paramdef>const char *<parameter>action</parameter></paramdef>
</funcprototype>
</funcsynopsis>
</refsynopsisdiv>
<refsect1>
<title>Description</title>
<para><function>sd_readahead()</function> may be
called by programs involved with early boot-up to
control ongoing disk read-ahead operations. It may be
used to terminate read-ahead operations in case an
uncommon disk access pattern is to be expected and
hence read-ahead replay or collection is unlikely to
have the desired speed-up effect on the current or
future boot-ups.</para>
<para>The <parameter>action</parameter> should be one
of the following strings:</para>
<variablelist>
<varlistentry>
<term>cancel</term>
<listitem><para>Terminates read-ahead
data collection, and drops all
read-ahead data collected during this
boot-up.</para></listitem>
</varlistentry>
<varlistentry>
<term>done</term>
<listitem><para>Terminates read-ahead
data collection, but keeps all
read-ahead data collected during this
boot-up around for use during
subsequent boot-ups.</para></listitem>
</varlistentry>
<varlistentry>
<term>noreplay</term>
<listitem><para>Terminates read-ahead
replay.</para></listitem>
</varlistentry>
</variablelist>
</refsect1>
<refsect1>
<title>Return Value</title>
<para>On failure, these calls return a negative
errno-style error code. It is generally recommended to
ignore the return value of this call.</para>
</refsect1>
<refsect1>
<title>Notes</title>
<para>This function is provided by the reference
implementation of APIs for new-style daemons and
distributed with the systemd package. The algorithm
it implements is simple, and can easily be
reimplemented in daemons if it is important to support
this interface without using the reference
implementation.</para>
<para>Internally, this function creates a file in
<filename>/dev/.systemd/readahead/</filename> which is
then used as flag file to notify the read-ahead
subsystem.</para>
<para>For details about the algorithm check the
liberally licensed reference implementation sources:
<ulink url="http://cgit.freedesktop.org/systemd/tree/src/sd-daemon.c"/>
resp. <ulink
url="http://cgit.freedesktop.org/systemd/tree/src/sd-daemon.h"/></para>
<para><function>sd_readahead()</function> is
implemented in the reference implementation's drop-in
<filename>sd-daemon.c</filename> and
<filename>sd-daemon.h</filename> files. It is
recommended that applications consuming this API copy
the implementation into their source tree. For more
details about the reference implementation see
<citerefentry><refentrytitle>sd_daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry></para>
<para>If -DDISABLE_SYSTEMD is set during compilation
this function will always return 0 and otherwise
become a NOP.</para>
</refsect1>
<refsect1>
<title>Examples</title>
<example>
<title>Cancelling all read-ahead operations</title>
<para>During boots where SELinux has to
relabel the file system hierarchy, it will
create a large amount of disk accesses that
are not necessary during normal boots. Hence
it is a good idea to disable both read-ahead replay and read-ahead collection.
</para>
<programlisting>sd_readahead("cancel");
sd_readahead("noreplay");</programlisting>
</example>
</refsect1>
<refsect1>
<title>See Also</title>
<para>
<citerefentry><refentrytitle>systemd</refentrytitle><manvolnum>1</manvolnum></citerefentry>,
<citerefentry><refentrytitle>sd_daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry>,
<citerefentry><refentrytitle>daemon</refentrytitle><manvolnum>7</manvolnum></citerefentry>
</para>
</refsect1>
</refentry>
...@@ -147,6 +147,17 @@ ...@@ -147,6 +147,17 @@
semantics of this option see semantics of this option see
<citerefentry><refentrytitle>sd_booted</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem> <citerefentry><refentrytitle>sd_booted</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem>
</varlistentry> </varlistentry>
<varlistentry>
<term><option>--readahead=</option></term>
<listitem><para>Controls disk
read-ahead operations. The argument
must be a string, and either "cancel",
"done" or "noreplay". For details
about the semantics of this option see
<citerefentry><refentrytitle>sd_readahead</refentrytitle><manvolnum>3</manvolnum></citerefentry>.</para></listitem>
</varlistentry>
</variablelist> </variablelist>
</refsect1> </refsect1>
......
...@@ -36,6 +36,7 @@ static bool arg_ready = false; ...@@ -36,6 +36,7 @@ static bool arg_ready = false;
static pid_t arg_pid = 0; static pid_t arg_pid = 0;
static const char *arg_status = NULL; static const char *arg_status = NULL;
static bool arg_booted = false; static bool arg_booted = false;
static const char *arg_readahead = NULL;
static int help(void) { static int help(void) {
...@@ -45,7 +46,8 @@ static int help(void) { ...@@ -45,7 +46,8 @@ static int help(void) {
" --ready Inform the init system about service start-up completion\n" " --ready Inform the init system about service start-up completion\n"
" --pid[=PID] Set main pid of daemon\n" " --pid[=PID] Set main pid of daemon\n"
" --status=TEXT Set status text\n" " --status=TEXT Set status text\n"
" --booted Returns 0 if the system was booted up with systemd, non-zero otherwise\n", " --booted Returns 0 if the system was booted up with systemd, non-zero otherwise\n"
" --readahead=ACTION Controls read-ahead operations\n",
program_invocation_short_name); program_invocation_short_name);
return 0; return 0;
...@@ -57,7 +59,8 @@ static int parse_argv(int argc, char *argv[]) { ...@@ -57,7 +59,8 @@ static int parse_argv(int argc, char *argv[]) {
ARG_READY = 0x100, ARG_READY = 0x100,
ARG_PID, ARG_PID,
ARG_STATUS, ARG_STATUS,
ARG_BOOTED ARG_BOOTED,
ARG_READAHEAD
}; };
static const struct option options[] = { static const struct option options[] = {
...@@ -66,6 +69,7 @@ static int parse_argv(int argc, char *argv[]) { ...@@ -66,6 +69,7 @@ static int parse_argv(int argc, char *argv[]) {
{ "pid", optional_argument, NULL, ARG_PID }, { "pid", optional_argument, NULL, ARG_PID },
{ "status", required_argument, NULL, ARG_STATUS }, { "status", required_argument, NULL, ARG_STATUS },
{ "booted", no_argument, NULL, ARG_BOOTED }, { "booted", no_argument, NULL, ARG_BOOTED },
{ "readahead", required_argument, NULL, ARG_READAHEAD },
{ NULL, 0, NULL, 0 } { NULL, 0, NULL, 0 }
}; };
...@@ -106,6 +110,10 @@ static int parse_argv(int argc, char *argv[]) { ...@@ -106,6 +110,10 @@ static int parse_argv(int argc, char *argv[]) {
arg_booted = true; arg_booted = true;
break; break;
case ARG_READAHEAD:
arg_readahead = optarg;
break;
case '?': case '?':
return -EINVAL; return -EINVAL;
...@@ -119,7 +127,8 @@ static int parse_argv(int argc, char *argv[]) { ...@@ -119,7 +127,8 @@ static int parse_argv(int argc, char *argv[]) {
!arg_ready && !arg_ready &&
!arg_status && !arg_status &&
!arg_pid && !arg_pid &&
!arg_booted) { !arg_booted &&
!arg_readahead) {
help(); help();
return -EINVAL; return -EINVAL;
} }
...@@ -144,6 +153,13 @@ int main(int argc, char* argv[]) { ...@@ -144,6 +153,13 @@ int main(int argc, char* argv[]) {
if (arg_booted) if (arg_booted)
return sd_booted() <= 0; return sd_booted() <= 0;
if (arg_readahead) {
if ((r = sd_readahead(arg_readahead)) < 0) {
log_error("Failed to issue read-ahead control command: %s", strerror(-r));
goto finish;
}
}
if (arg_ready) if (arg_ready)
our_env[i++] = (char*) "READY=1"; our_env[i++] = (char*) "READY=1";
......
...@@ -41,6 +41,7 @@ ...@@ -41,6 +41,7 @@
#include <sys/ioctl.h> #include <sys/ioctl.h>
#include <sys/vfs.h> #include <sys/vfs.h>
#include <getopt.h> #include <getopt.h>
#include <sys/inotify.h>
#include "missing.h" #include "missing.h"
#include "util.h" #include "util.h"
...@@ -56,6 +57,7 @@ ...@@ -56,6 +57,7 @@
* - sd_readahead_cancel * - sd_readahead_cancel
* - gzip? * - gzip?
* - remount rw? * - remount rw?
* - handle files where nothing is in mincore
* - does ioprio_set work with fadvise()? * - does ioprio_set work with fadvise()?
*/ */
...@@ -199,12 +201,13 @@ static int qsort_compare(const void *a, const void *b) { ...@@ -199,12 +201,13 @@ static int qsort_compare(const void *a, const void *b) {
static int collect(const char *root) { static int collect(const char *root) {
enum { enum {
FD_FANOTIFY, FD_FANOTIFY, /* Get the actualy fs events */
FD_SIGNAL, FD_SIGNAL,
FD_INOTIFY, /* We get notifications to quit early via this fd */
_FD_MAX _FD_MAX
}; };
struct pollfd pollfd[_FD_MAX]; struct pollfd pollfd[_FD_MAX];
int fanotify_fd = -1, signal_fd = -1, r = 0; int fanotify_fd = -1, signal_fd = -1, inotify_fd = -1, r = 0;
pid_t my_pid; pid_t my_pid;
Hashmap *files = NULL; Hashmap *files = NULL;
Iterator i; Iterator i;
...@@ -251,6 +254,11 @@ static int collect(const char *root) { ...@@ -251,6 +254,11 @@ static int collect(const char *root) {
goto finish; goto finish;
} }
if ((inotify_fd = open_inotify()) < 0) {
r = inotify_fd;
goto finish;
}
not_after = now(CLOCK_MONOTONIC) + arg_timeout; not_after = now(CLOCK_MONOTONIC) + arg_timeout;
my_pid = getpid(); my_pid = getpid();
...@@ -260,6 +268,8 @@ static int collect(const char *root) { ...@@ -260,6 +268,8 @@ static int collect(const char *root) {
pollfd[FD_FANOTIFY].events = POLLIN; pollfd[FD_FANOTIFY].events = POLLIN;
pollfd[FD_SIGNAL].fd = signal_fd; pollfd[FD_SIGNAL].fd = signal_fd;
pollfd[FD_SIGNAL].events = POLLIN; pollfd[FD_SIGNAL].events = POLLIN;
pollfd[FD_INOTIFY].fd = inotify_fd;
pollfd[FD_INOTIFY].events = POLLIN;
sd_notify(0, sd_notify(0,
"READY=1\n" "READY=1\n"
...@@ -267,6 +277,17 @@ static int collect(const char *root) { ...@@ -267,6 +277,17 @@ static int collect(const char *root) {
log_debug("Collecting..."); log_debug("Collecting...");
if (access("/dev/.systemd/readahead/cancel", F_OK) >= 0) {
log_debug("Collection canceled");
r = -ECANCELED;
goto finish;
}
if (access("/dev/.systemd/readahead/done", F_OK) >= 0) {
log_debug("Got termination request");
goto done;
}
for (;;) { for (;;) {
union { union {
struct fanotify_event_metadata metadata; struct fanotify_event_metadata metadata;
...@@ -298,14 +319,52 @@ static int collect(const char *root) { ...@@ -298,14 +319,52 @@ static int collect(const char *root) {
goto finish; goto finish;
} }
if (pollfd[FD_SIGNAL].revents != 0)
break;
if (h == 0) { if (h == 0) {
log_debug("Reached maximum collection time, ending collection."); log_debug("Reached maximum collection time, ending collection.");
break; break;
} }
if (pollfd[FD_SIGNAL].revents) {
log_debug("Got signal.");
break;
}
if (pollfd[FD_INOTIFY].revents) {
uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
struct inotify_event *e;
if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
if (errno == EINTR || errno == EAGAIN)
continue;
log_error("Failed to read inotify event: %m");
r = -errno;
goto finish;
}
e = (struct inotify_event*) inotify_buffer;
while (n > 0) {
size_t step;
if ((e->mask & IN_CREATE) && streq(e->name, "cancel")) {
log_debug("Collection canceled");
r = -ECANCELED;
goto finish;
}
if ((e->mask & IN_CREATE) && streq(e->name, "done")) {
log_debug("Got termination request");
goto done;
}
step = sizeof(struct inotify_event) + e->len;
assert(step <= (size_t) n);
e = (struct inotify_event*) ((uint8_t*) e + step);
n -= step;
}
}
if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) { if ((n = read(fanotify_fd, &data, sizeof(data))) < 0) {
if (errno == EINTR || errno == EAGAIN) if (errno == EINTR || errno == EAGAIN)
...@@ -352,6 +411,7 @@ static int collect(const char *root) { ...@@ -352,6 +411,7 @@ static int collect(const char *root) {
} }
} }
done:
if (fanotify_fd >= 0) { if (fanotify_fd >= 0) {
close_nointr_nofail(fanotify_fd); close_nointr_nofail(fanotify_fd);
fanotify_fd = -1; fanotify_fd = -1;
...@@ -451,6 +511,9 @@ finish: ...@@ -451,6 +511,9 @@ finish:
if (signal_fd >= 0) if (signal_fd >= 0)
close_nointr_nofail(signal_fd); close_nointr_nofail(signal_fd);
if (inotify_fd >= 0)
close_nointr_nofail(inotify_fd);
if (pack) { if (pack) {
fclose(pack); fclose(pack);
unlink(pack_fn_new); unlink(pack_fn_new);
......
...@@ -24,6 +24,7 @@ ...@@ -24,6 +24,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <sys/sysinfo.h> #include <sys/sysinfo.h>
#include <sys/inotify.h>
#include "log.h" #include "log.h"
#include "readahead-common.h" #include "readahead-common.h"
...@@ -116,3 +117,23 @@ bool enough_ram(void) { ...@@ -116,3 +117,23 @@ bool enough_ram(void) {
* with at least 128MB * with at least 128MB
* memory */ * memory */
} }
int open_inotify(void) {
int fd;
if ((fd = inotify_init1(IN_CLOEXEC|IN_NONBLOCK)) < 0) {
log_error("Failed to create inotify handle: %m");
return -errno;
}
mkdir("/dev/.systemd", 0755);
mkdir("/dev/.systemd/readahead", 0755);
if (inotify_add_watch(fd, "/dev/.systemd/readahead", IN_CREATE) < 0) {
log_error("Failed to watch /dev/.systemd/readahead: %m");
close_nointr_nofail(fd);
return -errno;
}
return fd;
}
...@@ -32,4 +32,6 @@ int fs_on_ssd(const char *p); ...@@ -32,4 +32,6 @@ int fs_on_ssd(const char *p);
bool enough_ram(void); bool enough_ram(void);
int open_inotify(void);
#endif #endif
...@@ -33,6 +33,7 @@ ...@@ -33,6 +33,7 @@
#include <sys/stat.h> #include <sys/stat.h>
#include <unistd.h> #include <unistd.h>
#include <getopt.h> #include <getopt.h>
#include <sys/inotify.h>
#include "missing.h" #include "missing.h"
#include "util.h" #include "util.h"
...@@ -119,6 +120,7 @@ static int replay(const char *root) { ...@@ -119,6 +120,7 @@ static int replay(const char *root) {
char *pack_fn = NULL, c; char *pack_fn = NULL, c;
bool on_ssd, ready = false; bool on_ssd, ready = false;
int prio; int prio;
int inotify_fd = -1;
assert(root); assert(root);
...@@ -141,6 +143,11 @@ static int replay(const char *root) { ...@@ -141,6 +143,11 @@ static int replay(const char *root) {
goto finish; goto finish;
} }
if ((inotify_fd = open_inotify()) < 0) {
r = inotify_fd;
goto finish;
}
if (!(fgets(line, sizeof(line), pack))) { if (!(fgets(line, sizeof(line), pack))) {
log_error("Premature end of pack file."); log_error("Premature end of pack file.");
r = -EIO; r = -EIO;
...@@ -177,8 +184,40 @@ static int replay(const char *root) { ...@@ -177,8 +184,40 @@ static int replay(const char *root) {
log_debug("Replaying..."); log_debug("Replaying...");
if (access("/dev/.systemd/readahead/noreplay", F_OK) >= 0) {
log_debug("Got termination request");
goto done;
}
while (!feof(pack) && !ferror(pack)) { while (!feof(pack) && !ferror(pack)) {
uint8_t inotify_buffer[sizeof(struct inotify_event) + FILENAME_MAX];
int k; int k;
ssize_t n;
if ((n = read(inotify_fd, &inotify_buffer, sizeof(inotify_buffer))) < 0) {
if (errno != EINTR && errno != EAGAIN) {
log_error("Failed to read inotify event: %m");
r = -errno;
goto finish;
}
} else {
struct inotify_event