Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
steam
systemd
Commits
22be093f
Commit
22be093f
authored
Sep 23, 2010
by
Lennart Poettering
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
readahead: implement minimal readahead logic based on fanotify(), mincore() and readahead()
parent
647703fe
Changes
12
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
966 additions
and
1 deletion
+966
-1
.gitignore
.gitignore
+2
-0
Makefile.am
Makefile.am
+27
-1
configure.ac
configure.ac
+1
-0
src/hashmap.c
src/hashmap.c
+15
-0
src/hashmap.h
src/hashmap.h
+1
-0
src/linux/fanotify.h
src/linux/fanotify.h
+98
-0
src/macro.h
src/macro.h
+6
-0
src/missing.h
src/missing.h
+25
-0
src/readahead-collect.c
src/readahead-collect.c
+437
-0
src/readahead-common.c
src/readahead-common.c
+107
-0
src/readahead-common.h
src/readahead-common.h
+33
-0
src/readahead-replay.c
src/readahead-replay.c
+214
-0
No files found.
.gitignore
View file @
22be093f
systemd-readahead-collect
systemd-readahead-replay
systemd-reply-password
systemd-ask-password-agent
systemd-ask-password
...
...
Makefile.am
View file @
22be093f
...
...
@@ -92,7 +92,9 @@ rootlibexec_PROGRAMS = \
systemd-remount-api-vfs
\
systemd-kmsg-syslogd
\
systemd-vconsole-setup
\
systemd-reply-password
systemd-reply-password
\
systemd-readahead-collect
\
systemd-readahead-replay
noinst_PROGRAMS
=
\
test-engine
\
...
...
@@ -699,6 +701,30 @@ systemd_reply_password_SOURCES = \
systemd_reply_password_LDADD
=
\
libsystemd-basic.la
systemd_readahead_collect_SOURCES
=
\
src/readahead-collect.c
\
src/sd-daemon.c
\
src/readahead-common.c
systemd_readahead_collect_CFLAGS
=
\
$(UDEV_CFLAGS)
systemd_readahead_collect_LDADD
=
\
libsystemd-basic.la
\
$(UDEV_LIBS)
systemd_readahead_replay_SOURCES
=
\
src/readahead-replay.c
\
src/sd-daemon.c
\
src/readahead-common.c
systemd_readahead_replay_CFLAGS
=
\
$(UDEV_CFLAGS)
systemd_readahead_replay_LDADD
=
\
libsystemd-basic.la
\
$(UDEV_LIBS)
systemd_cgls_SOURCES
=
\
src/cgls.c
\
src/cgroup-show.c
\
...
...
configure.ac
View file @
22be093f
...
...
@@ -27,6 +27,7 @@ AM_INIT_AUTOMAKE([foreign 1.11 -Wall -Wno-portability silent-rules tar-pax subdi
AC_SUBST(PACKAGE_URL, [http://www.freedesktop.org/wiki/Software/systemd])
AC_CANONICAL_HOST
AC_DEFINE_UNQUOTED([CANONICAL_HOST], "$host", [Canonical host string.])
AM_SILENT_RULES([yes])
...
...
src/hashmap.c
View file @
22be093f
...
...
@@ -476,6 +476,21 @@ void* hashmap_steal_first(Hashmap *h) {
return
data
;
}
void
*
hashmap_steal_first_key
(
Hashmap
*
h
)
{
void
*
key
;
if
(
!
h
)
return
NULL
;
if
(
!
h
->
iterate_list_head
)
return
NULL
;
key
=
(
void
*
)
h
->
iterate_list_head
->
key
;
remove_entry
(
h
,
h
->
iterate_list_head
);
return
key
;
}
unsigned
hashmap_size
(
Hashmap
*
h
)
{
if
(
!
h
)
...
...
src/hashmap.h
View file @
22be093f
...
...
@@ -72,6 +72,7 @@ void *hashmap_iterate_skip(Hashmap *h, const void *key, Iterator *i);
void
hashmap_clear
(
Hashmap
*
h
);
void
*
hashmap_steal_first
(
Hashmap
*
h
);
void
*
hashmap_steal_first_key
(
Hashmap
*
h
);
void
*
hashmap_first
(
Hashmap
*
h
);
void
*
hashmap_last
(
Hashmap
*
h
);
...
...
src/linux/fanotify.h
0 → 100644
View file @
22be093f
#ifndef _LINUX_FANOTIFY_H
#define _LINUX_FANOTIFY_H
#include <linux/types.h>
/* the following events that user-space can register for */
#define FAN_ACCESS 0x00000001
/* File was accessed */
#define FAN_MODIFY 0x00000002
/* File was modified */
#define FAN_CLOSE_WRITE 0x00000008
/* Unwrittable file closed */
#define FAN_CLOSE_NOWRITE 0x00000010
/* Writtable file closed */
#define FAN_OPEN 0x00000020
/* File was opened */
#define FAN_EVENT_ON_CHILD 0x08000000
/* interested in child events */
/* FIXME currently Q's have no limit.... */
#define FAN_Q_OVERFLOW 0x00004000
/* Event queued overflowed */
#define FAN_OPEN_PERM 0x00010000
/* File open in perm check */
#define FAN_ACCESS_PERM 0x00020000
/* File accessed in perm check */
/* helper events */
#define FAN_CLOSE (FAN_CLOSE_WRITE | FAN_CLOSE_NOWRITE)
/* close */
/* flags used for fanotify_init() */
#define FAN_CLOEXEC 0x00000001
#define FAN_NONBLOCK 0x00000002
#define FAN_ALL_INIT_FLAGS (FAN_CLOEXEC | FAN_NONBLOCK)
/* flags used for fanotify_modify_mark() */
#define FAN_MARK_ADD 0x00000001
#define FAN_MARK_REMOVE 0x00000002
#define FAN_MARK_DONT_FOLLOW 0x00000004
#define FAN_MARK_ONLYDIR 0x00000008
#define FAN_MARK_MOUNT 0x00000010
#define FAN_MARK_IGNORED_MASK 0x00000020
#define FAN_MARK_IGNORED_SURV_MODIFY 0x00000040
#define FAN_MARK_FLUSH 0x00000080
#define FAN_ALL_MARK_FLAGS (FAN_MARK_ADD |\
FAN_MARK_REMOVE |\
FAN_MARK_DONT_FOLLOW |\
FAN_MARK_ONLYDIR |\
FAN_MARK_MOUNT |\
FAN_MARK_IGNORED_MASK |\
FAN_MARK_IGNORED_SURV_MODIFY)
/*
* All of the events - we build the list by hand so that we can add flags in
* the future and not break backward compatibility. Apps will get only the
* events that they originally wanted. Be sure to add new events here!
*/
#define FAN_ALL_EVENTS (FAN_ACCESS |\
FAN_MODIFY |\
FAN_CLOSE |\
FAN_OPEN)
/*
* All events which require a permission response from userspace
*/
#define FAN_ALL_PERM_EVENTS (FAN_OPEN_PERM |\
FAN_ACCESS_PERM)
#define FAN_ALL_OUTGOING_EVENTS (FAN_ALL_EVENTS |\
FAN_ALL_PERM_EVENTS |\
FAN_Q_OVERFLOW)
#define FANOTIFY_METADATA_VERSION 2
struct
fanotify_event_metadata
{
__u32
event_len
;
__u32
vers
;
__u64
mask
;
__s32
fd
;
__s32
pid
;
}
__attribute__
((
packed
));
struct
fanotify_response
{
__s32
fd
;
__u32
response
;
}
__attribute__
((
packed
));
/* Legit userspace responses to a _PERM event */
#define FAN_ALLOW 0x01
#define FAN_DENY 0x02
/* Helper functions to deal with fanotify_event_metadata buffers */
#define FAN_EVENT_METADATA_LEN (sizeof(struct fanotify_event_metadata))
#define FAN_EVENT_NEXT(meta, len) ((len) -= (meta)->event_len, \
(struct fanotify_event_metadata*)(((char *)(meta)) + \
(meta)->event_len))
#define FAN_EVENT_OK(meta, len) ((long)(len) >= (long)FAN_EVENT_METADATA_LEN && \
(long)(meta)->event_len >= (long)FAN_EVENT_METADATA_LEN && \
(long)(meta)->event_len <= (long)(len))
#endif
/* _LINUX_FANOTIFY_H */
src/macro.h
View file @
22be093f
...
...
@@ -27,6 +27,8 @@
#include <sys/uio.h>
#include <inttypes.h>
#define PAGE_SIZE 4096
#define _printf_attr_(a,b) __attribute__ ((format (printf, a, b)))
#define _sentinel_ __attribute__ ((sentinel))
#define _noreturn_ __attribute__((noreturn))
...
...
@@ -49,6 +51,10 @@ static inline size_t ALIGN(size_t l) {
return
((
l
+
sizeof
(
void
*
)
-
1
)
&
~
(
sizeof
(
void
*
)
-
1
));
}
static
inline
size_t
PAGE_ALIGN
(
size_t
l
)
{
return
((
l
+
PAGE_SIZE
-
1
)
&
~
(
PAGE_SIZE
-
1
));
}
#define ELEMENTSOF(x) (sizeof(x)/sizeof((x)[0]))
#define MAX(a,b) \
...
...
src/missing.h
View file @
22be093f
...
...
@@ -76,4 +76,29 @@ static inline int pivot_root(const char *new_root, const char *put_old) {
return
syscall
(
SYS_pivot_root
,
new_root
,
put_old
);
}
#ifdef __x86_64__
#ifndef __NR_fanotify_init
#define __NR_fanotify_init 300
#endif
#ifndef __NR_fanotify_mark
#define __NR_fanotify_mark 301
#endif
#else
#ifndef __NR_fanotify_init
#define __NR_fanotify_init 338
#endif
#ifndef __NR_fanotify_mark
#define __NR_fanotify_mark 339
#endif
#endif
static
inline
int
fanotify_init
(
unsigned
int
flags
,
unsigned
int
event_f_flags
)
{
return
syscall
(
__NR_fanotify_init
,
flags
,
event_f_flags
);
}
static
inline
int
fanotify_mark
(
int
fanotify_fd
,
unsigned
int
flags
,
__u64
mask
,
int
dfd
,
const
char
*
pathname
)
{
return
syscall
(
__NR_fanotify_mark
,
fanotify_fd
,
flags
,
mask
,
dfd
,
pathname
);
}
#endif
src/readahead-collect.c
0 → 100644
View file @
22be093f
/*-*- Mode: C; c-basic-offset: 8; indent-tabs-mode: nil -*-*/
/***
This file is part of systemd.
Copyright 2010 Lennart Poettering
systemd is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
systemd is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
General Public License for more details.
You should have received a copy of the GNU General Public License
along with systemd; If not, see <http://www.gnu.org/licenses/>.
***/
#include <errno.h>
#include <inttypes.h>
#include <fcntl.h>
#include <linux/limits.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/select.h>
#include <sys/time.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <linux/fanotify.h>
#include <sys/signalfd.h>
#include <sys/poll.h>
#include <sys/mman.h>
#include <linux/fs.h>
#include <linux/fiemap.h>
#include <sys/ioctl.h>
#include "missing.h"
#include "util.h"
#include "set.h"
#include "sd-daemon.h"
#include "ioprio.h"
#include "readahead-common.h"
/*
fixme:
- BTRFS_IOC_DEFRAG
*/
#define MINCORE_VEC_SIZE (READAHEAD_FILE_SIZE_MAX/PAGE_SIZE)
static
int
pack_file
(
FILE
*
pack
,
const
char
*
fn
)
{
struct
stat
st
;
void
*
start
=
MAP_FAILED
;
uint8_t
vec
[
MINCORE_VEC_SIZE
];
uint32_t
b
,
c
;
size_t
l
,
pages
;
bool
mapped
;
int
r
=
0
,
fd
=
-
1
,
k
;
assert
(
pack
);
assert
(
fn
);
if
((
fd
=
open
(
fn
,
O_RDONLY
|
O_CLOEXEC
|
O_NOATIME
|
O_NOCTTY
|
O_NOFOLLOW
))
<
0
)
{
log_warning
(
"open(%s) failed: %m"
,
fn
);
r
=
-
errno
;
goto
finish
;
}
if
((
k
=
file_verify
(
fd
,
fn
,
&
st
))
<=
0
)
{
r
=
k
;
goto
finish
;
}
l
=
PAGE_ALIGN
(
st
.
st_size
);
if
((
start
=
mmap
(
NULL
,
l
,
PROT_READ
,
MAP_SHARED
,
fd
,
0
))
==
MAP_FAILED
)
{
log_warning
(
"mmap(%s) failed: %m"
,
fn
);
r
=
-
errno
;
goto
finish
;
}
if
(
mincore
(
start
,
l
,
vec
)
<
0
)
{
log_warning
(
"mincore(%s) failed: %m"
,
fn
);
r
=
-
errno
;
goto
finish
;
}
fputs
(
fn
,
pack
);
fputc
(
'\n'
,
pack
);
pages
=
l
/
PAGE_SIZE
;
mapped
=
false
;
for
(
c
=
0
;
c
<
pages
;
c
++
)
{
bool
new_mapped
=
(
vec
[
c
]
&
1
);
if
(
!
mapped
&&
new_mapped
)
b
=
c
;
else
if
(
mapped
&&
!
new_mapped
)
{
fwrite
(
&
b
,
sizeof
(
b
),
1
,
pack
);
fwrite
(
&
c
,
sizeof
(
c
),
1
,
pack
);
log_debug
(
"%s: page %u to %u"
,
fn
,
b
,
c
);
}
mapped
=
new_mapped
;
}
/* We don't write any range data if we should read the entire file */
if
(
mapped
&&
b
>
0
)
{
fwrite
(
&
b
,
sizeof
(
b
),
1
,
pack
);
fwrite
(
&
c
,
sizeof
(
c
),
1
,
pack
);
log_debug
(
"%s: page %u to %u"
,
fn
,
b
,
c
);
}
/* End marker */
b
=
0
;
fwrite
(
&
b
,
sizeof
(
b
),
1
,
pack
);
fwrite
(
&
b
,
sizeof
(
b
),
1
,
pack
);
finish:
if
(
start
!=
MAP_FAILED
)
munmap
(
start
,
l
);
if
(
fd
>=
0
)
close_nointr_nofail
(
fd
);
return
r
;
}
static
unsigned
long
fd_first_block
(
int
fd
)
{
struct
{
struct
fiemap
fiemap
;
struct
fiemap_extent
extent
;
}
data
;
zero
(
data
);
data
.
fiemap
.
fm_length
=
~
0ULL
;
data
.
fiemap
.
fm_extent_count
=
1
;
if
(
ioctl
(
fd
,
FS_IOC_FIEMAP
,
&
data
)
<
0
)
return
0
;
if
(
data
.
fiemap
.
fm_mapped_extents
<=
0
)
return
0
;
if
(
data
.
fiemap
.
fm_extents
[
0
].
fe_flags
&
FIEMAP_EXTENT_UNKNOWN
)
return
0
;
return
(
unsigned
long
)
data
.
fiemap
.
fm_extents
[
0
].
fe_physical
;
}
struct
item
{
const
char
*
path
;
unsigned
long
block
;
};
static
int
qsort_compare
(
const
void
*
a
,
const
void
*
b
)
{
const
struct
item
*
i
,
*
j
;
i
=
a
;
j
=
b
;
if
(
i
->
block
<
j
->
block
)
return
-
1
;
if
(
i
->
block
>
j
->
block
)
return
1
;
return
strcmp
(
i
->
path
,
j
->
path
);
}
static
int
collect
(
const
char
*
root
)
{
enum
{
FD_FANOTIFY
,
FD_SIGNAL
,
_FD_MAX
};
struct
pollfd
pollfd
[
_FD_MAX
];
int
fanotify_fd
=
-
1
,
signal_fd
=
-
1
,
r
=
0
;
pid_t
my_pid
;
Hashmap
*
files
=
NULL
;
Iterator
i
;
char
*
p
,
*
q
;
sigset_t
mask
;
FILE
*
pack
=
NULL
;
char
*
pack_fn_new
=
NULL
,
*
pack_fn
=
NULL
;
bool
on_ssd
;
assert
(
root
);
if
(
ioprio_set
(
IOPRIO_WHO_PROCESS
,
getpid
(),
IOPRIO_PRIO_VALUE
(
IOPRIO_CLASS_IDLE
,
0
))
<
0
)
log_warning
(
"Failed to set IDLE IO priority class: %m"
);
assert_se
(
sigemptyset
(
&
mask
)
==
0
);
sigset_add_many
(
&
mask
,
SIGINT
,
SIGTERM
,
-
1
);
assert_se
(
sigprocmask
(
SIG_SETMASK
,
&
mask
,
NULL
)
==
0
);
if
((
signal_fd
=
signalfd
(
-
1
,
&
mask
,
SFD_NONBLOCK
|
SFD_CLOEXEC
))
<
0
)
{
log_error
(
"signalfd(): %m"
);
r
=
-
errno
;
goto
finish
;
}
if
(
!
(
files
=
hashmap_new
(
string_hash_func
,
string_compare_func
)))
{
log_error
(
"Failed to allocate set."
);
r
=
-
ENOMEM
;
goto
finish
;
}
if
((
fanotify_fd
=
fanotify_init
(
FAN_CLOEXEC
,
O_RDONLY
|
O_LARGEFILE
|
O_CLOEXEC
|
O_NOATIME
))
<
0
)
{
log_error
(
"Failed to create fanotify object: %m"
);
r
=
-
errno
;
goto
finish
;
}
if
(
fanotify_mark
(
fanotify_fd
,
FAN_MARK_ADD
|
FAN_MARK_MOUNT
,
FAN_OPEN
,
AT_FDCWD
,
root
)
<
0
)
{
log_error
(
"Failed to mark %s: %m"
,
root
);
r
=
-
errno
;
goto
finish
;
}
my_pid
=
getpid
();
zero
(
pollfd
);
pollfd
[
FD_FANOTIFY
].
fd
=
fanotify_fd
;
pollfd
[
FD_FANOTIFY
].
events
=
POLLIN
;
pollfd
[
FD_SIGNAL
].
fd
=
signal_fd
;
pollfd
[
FD_SIGNAL
].
events
=
POLLIN
;
sd_notify
(
0
,
"READY=1
\n
"
"STATUS=Collecting readahead data"
);
log_debug
(
"Collecting..."
);
for
(;;)
{
union
{
struct
fanotify_event_metadata
metadata
;
char
buffer
[
4096
];
}
data
;
ssize_t
n
;
struct
fanotify_event_metadata
*
m
;
if
(
poll
(
pollfd
,
_FD_MAX
,
-
1
)
<
0
)
{
if
(
errno
==
EINTR
)
continue
;
log_error
(
"poll(): %m"
);
r
=
-
errno
;
goto
finish
;
}
if
(
pollfd
[
FD_SIGNAL
].
revents
!=
0
)
break
;
if
((
n
=
read
(
fanotify_fd
,
&
data
,
sizeof
(
data
)))
<
0
)
{
if
(
errno
==
EINTR
||
errno
==
EAGAIN
)
continue
;
log_error
(
"Failed to read event: %m"
);
r
=
-
errno
;
goto
finish
;
}
m
=
&
data
.
metadata
;
while
(
FAN_EVENT_OK
(
m
,
n
))
{
if
(
m
->
pid
!=
my_pid
&&
m
->
fd
>=
0
)
{
char
fn
[
PATH_MAX
];
int
k
;
snprintf
(
fn
,
sizeof
(
fn
),
"/proc/self/fd/%i"
,
m
->
fd
);
char_array_0
(
fn
);
if
((
k
=
readlink_malloc
(
fn
,
&
p
))
>=
0
)
{
if
(
hashmap_get
(
files
,
p
))
/* Already read */
free
(
p
);
else
{
unsigned
long
ul
;
ul
=
fd_first_block
(
m
->
fd
);
if
((
k
=
hashmap_put
(
files
,
p
,
ULONG_TO_PTR
(
ul
)))
<
0
)
{
if
(
k
!=
-
EEXIST
)
log_warning
(
"set_put() failed: %s"
,
strerror
(
-
k
));
free
(
p
);
}
}
}
else
log_warning
(
"readlink(%s) failed: %s"
,
fn
,
strerror
(
-
k
));
}
if
(
m
->
fd
)
close_nointr_nofail
(
m
->
fd
);
m
=
FAN_EVENT_NEXT
(
m
,
n
);
}
}
if
(
fanotify_fd
>=
0
)
{
close_nointr_nofail
(
fanotify_fd
);
fanotify_fd
=
-
1
;
}
log_debug
(
"Writing Pack File..."
);
on_ssd
=
fs_on_ssd
(
root
);
log_debug
(
"On SSD: %s"
,
yes_no
(
on_ssd
));
asprintf
(
&
pack_fn
,
"%s/.readahead"
,
root
);
asprintf
(
&
pack_fn_new
,
"%s/.readahead.new"
,
root
);
if
(
!
pack_fn
||
!
pack_fn_new
)
{
log_error
(
"Out of memory"
);
r
=
-
ENOMEM
;
goto
finish
;
}
if
(
!
(
pack
=
fopen
(
pack_fn_new
,
"we"
)))
{
log_error
(
"Failed to open pack file: %m"
);
r
=
-
errno
;
goto
finish
;
}
fputs
(
CANONICAL_HOST
"
\n
"
,
pack
);
putc
(
on_ssd
?
'S'
:
'R'
,
pack
);
if
(
on_ssd
)
{
/* On SSD, just write things out in the order the
* files where accessed */
HASHMAP_FOREACH_KEY
(
q
,
p
,
files
,
i
)
pack_file
(
pack
,
p
);
}
else
{
struct
item
*
ordered
,
*
j
;
unsigned
k
,
n
;
/* On rotating media, order things by the block
* numbers */
log_debug
(
"Ordering..."
);
n
=
hashmap_size
(
files
);
if
(
!
(
ordered
=
new
(
struct
item
,
n
)))
{
log_error
(
"Out of memory"
);
r
=
-
ENOMEM
;
goto
finish
;
}
j
=
ordered
;
HASHMAP_FOREACH_KEY
(
q
,
p
,
files
,
i
)
{
j
->
path
=
p
;
j
->
block
=
PTR_TO_ULONG
(
q
);
j
++
;
}
assert
(
ordered
+
n
==
j
);
qsort
(
ordered
,
n
,
sizeof
(
struct
item
),
qsort_compare
);
for
(
k
=
0
;
k
<
n
;
k
++
)
pack_file
(
pack
,
ordered
[
k
].
path
);
free
(
ordered
);
}
log_debug
(
"Finalizing..."
);
fflush
(
pack
);
if
(
ferror
(
pack
))
{
log_error
(
"Failed to write pack file."
);
r
=
-
EIO
;
goto
finish
;
}
if
(
rename
(
pack_fn_new
,
pack_fn
)
<
0
)
{
log_error
(
"Failed to rename readahead file: %m"
);
r
=
-
errno
;