Commit 481eaec3 authored by Michael S. Tsirkin's avatar Michael S. Tsirkin

tools/virtio: add ringtest utilities

This adds micro-benchmarks useful for tuning virtio ring layouts.
Three layouts are currently implemented:

- virtio 0.9 compatible one
- an experimental extension bypassing the ring index, polling ring
  itself instead
- an experimental extension bypassing avail and used ring completely

Typical use:

sh run-on-all.sh perf stat -r 10 --log-fd 1 -- ./ring

It doesn't depend on the kernel directly, but it's handy
to have as much virtio stuff as possible in one tree.
Signed-off-by: default avatarMichael S. Tsirkin <mst@redhat.com>
parent fb9b050c
all:
all: ring virtio_ring_0_9 virtio_ring_poll
CFLAGS += -Wall
CFLAGS += -pthread -O2 -ggdb
LDFLAGS += -pthread -O2 -ggdb
main.o: main.c main.h
ring.o: ring.c main.h
virtio_ring_0_9.o: virtio_ring_0_9.c main.h
virtio_ring_poll.o: virtio_ring_poll.c virtio_ring_0_9.c main.h
ring: ring.o main.o
virtio_ring_0_9: virtio_ring_0_9.o main.o
virtio_ring_poll: virtio_ring_poll.o main.o
clean:
-rm main.o
-rm ring.o ring
-rm virtio_ring_0_9.o virtio_ring_0_9
-rm virtio_ring_poll.o virtio_ring_poll
.PHONY: all clean
Partial implementation of various ring layouts, useful to tune virtio design.
Uses shared memory heavily.
/*
* Copyright (C) 2016 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Command line processing and common functions for ring benchmarking.
*/
#define _GNU_SOURCE
#include <getopt.h>
#include <pthread.h>
#include <assert.h>
#include <sched.h>
#include "main.h"
#include <sys/eventfd.h>
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <limits.h>
int runcycles = 10000000;
int max_outstanding = INT_MAX;
int batch = 1;
bool do_sleep = false;
bool do_relax = false;
bool do_exit = true;
unsigned ring_size = 256;
static int kickfd = -1;
static int callfd = -1;
void notify(int fd)
{
unsigned long long v = 1;
int r;
vmexit();
r = write(fd, &v, sizeof v);
assert(r == sizeof v);
vmentry();
}
void wait_for_notify(int fd)
{
unsigned long long v = 1;
int r;
vmexit();
r = read(fd, &v, sizeof v);
assert(r == sizeof v);
vmentry();
}
void kick(void)
{
notify(kickfd);
}
void wait_for_kick(void)
{
wait_for_notify(kickfd);
}
void call(void)
{
notify(callfd);
}
void wait_for_call(void)
{
wait_for_notify(callfd);
}
void set_affinity(const char *arg)
{
cpu_set_t cpuset;
int ret;
pthread_t self;
long int cpu;
char *endptr;
if (!arg)
return;
cpu = strtol(arg, &endptr, 0);
assert(!*endptr);
assert(cpu >= 0 || cpu < CPU_SETSIZE);
self = pthread_self();
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
ret = pthread_setaffinity_np(self, sizeof(cpu_set_t), &cpuset);
assert(!ret);
}
static void run_guest(void)
{
int completed_before;
int completed = 0;
int started = 0;
int bufs = runcycles;
int spurious = 0;
int r;
unsigned len;
void *buf;
int tokick = batch;
for (;;) {
if (do_sleep)
disable_call();
completed_before = completed;
do {
if (started < bufs &&
started - completed < max_outstanding) {
r = add_inbuf(0, NULL, "Hello, world!");
if (__builtin_expect(r == 0, true)) {
++started;
if (!--tokick) {
tokick = batch;
if (do_sleep)
kick_available();
}
}
} else
r = -1;
/* Flush out completed bufs if any */
if (get_buf(&len, &buf)) {
++completed;
if (__builtin_expect(completed == bufs, false))
return;
r = 0;
}
} while (r == 0);
if (completed == completed_before)
++spurious;
assert(completed <= bufs);
assert(started <= bufs);
if (do_sleep) {
if (enable_call())
wait_for_call();
} else {
poll_used();
}
}
}
static void run_host(void)
{
int completed_before;
int completed = 0;
int spurious = 0;
int bufs = runcycles;
unsigned len;
void *buf;
for (;;) {
if (do_sleep) {
if (enable_kick())
wait_for_kick();
} else {
poll_avail();
}
if (do_sleep)
disable_kick();
completed_before = completed;
while (__builtin_expect(use_buf(&len, &buf), true)) {
if (do_sleep)
call_used();
++completed;
if (__builtin_expect(completed == bufs, false))
return;
}
if (completed == completed_before)
++spurious;
assert(completed <= bufs);
if (completed == bufs)
break;
}
}
void *start_guest(void *arg)
{
set_affinity(arg);
run_guest();
pthread_exit(NULL);
}
void *start_host(void *arg)
{
set_affinity(arg);
run_host();
pthread_exit(NULL);
}
static const char optstring[] = "";
static const struct option longopts[] = {
{
.name = "help",
.has_arg = no_argument,
.val = 'h',
},
{
.name = "host-affinity",
.has_arg = required_argument,
.val = 'H',
},
{
.name = "guest-affinity",
.has_arg = required_argument,
.val = 'G',
},
{
.name = "ring-size",
.has_arg = required_argument,
.val = 'R',
},
{
.name = "run-cycles",
.has_arg = required_argument,
.val = 'C',
},
{
.name = "outstanding",
.has_arg = required_argument,
.val = 'o',
},
{
.name = "batch",
.has_arg = required_argument,
.val = 'b',
},
{
.name = "sleep",
.has_arg = no_argument,
.val = 's',
},
{
.name = "relax",
.has_arg = no_argument,
.val = 'x',
},
{
.name = "exit",
.has_arg = no_argument,
.val = 'e',
},
{
}
};
static void help(void)
{
fprintf(stderr, "Usage: <test> [--help]"
" [--host-affinity H]"
" [--guest-affinity G]"
" [--ring-size R (default: %d)]"
" [--run-cycles C (default: %d)]"
" [--batch b]"
" [--outstanding o]"
" [--sleep]"
" [--relax]"
" [--exit]"
"\n",
ring_size,
runcycles);
}
int main(int argc, char **argv)
{
int ret;
pthread_t host, guest;
void *tret;
char *host_arg = NULL;
char *guest_arg = NULL;
char *endptr;
long int c;
kickfd = eventfd(0, 0);
assert(kickfd >= 0);
callfd = eventfd(0, 0);
assert(callfd >= 0);
for (;;) {
int o = getopt_long(argc, argv, optstring, longopts, NULL);
switch (o) {
case -1:
goto done;
case '?':
help();
exit(2);
case 'H':
host_arg = optarg;
break;
case 'G':
guest_arg = optarg;
break;
case 'R':
ring_size = strtol(optarg, &endptr, 0);
assert(ring_size && !(ring_size & (ring_size - 1)));
assert(!*endptr);
break;
case 'C':
c = strtol(optarg, &endptr, 0);
assert(!*endptr);
assert(c > 0 && c < INT_MAX);
runcycles = c;
break;
case 'o':
c = strtol(optarg, &endptr, 0);
assert(!*endptr);
assert(c > 0 && c < INT_MAX);
max_outstanding = c;
break;
case 'b':
c = strtol(optarg, &endptr, 0);
assert(!*endptr);
assert(c > 0 && c < INT_MAX);
batch = c;
break;
case 's':
do_sleep = true;
break;
case 'x':
do_relax = true;
break;
case 'e':
do_exit = true;
break;
default:
help();
exit(4);
break;
}
}
/* does nothing here, used to make sure all smp APIs compile */
smp_acquire();
smp_release();
smp_mb();
done:
if (batch > max_outstanding)
batch = max_outstanding;
if (optind < argc) {
help();
exit(4);
}
alloc_ring();
ret = pthread_create(&host, NULL, start_host, host_arg);
assert(!ret);
ret = pthread_create(&guest, NULL, start_guest, guest_arg);
assert(!ret);
ret = pthread_join(guest, &tret);
assert(!ret);
ret = pthread_join(host, &tret);
assert(!ret);
return 0;
}
/*
* Copyright (C) 2016 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Common macros and functions for ring benchmarking.
*/
#ifndef MAIN_H
#define MAIN_H
#include <stdbool.h>
extern bool do_exit;
#if defined(__x86_64__) || defined(__i386__)
#include "x86intrin.h"
static inline void wait_cycles(unsigned long long cycles)
{
unsigned long long t;
t = __rdtsc();
while (__rdtsc() - t < cycles) {}
}
#define VMEXIT_CYCLES 500
#define VMENTRY_CYCLES 500
#else
static inline void wait_cycles(unsigned long long cycles)
{
_Exit(5);
}
#define VMEXIT_CYCLES 0
#define VMENTRY_CYCLES 0
#endif
static inline void vmexit(void)
{
if (!do_exit)
return;
wait_cycles(VMEXIT_CYCLES);
}
static inline void vmentry(void)
{
if (!do_exit)
return;
wait_cycles(VMENTRY_CYCLES);
}
/* implemented by ring */
void alloc_ring(void);
/* guest side */
int add_inbuf(unsigned, void *, void *);
void *get_buf(unsigned *, void **);
void disable_call();
bool enable_call();
void kick_available();
void poll_used();
/* host side */
void disable_kick();
bool enable_kick();
bool use_buf(unsigned *, void **);
void call_used();
void poll_avail();
/* implemented by main */
extern bool do_sleep;
void kick(void);
void wait_for_kick(void);
void call(void);
void wait_for_call(void);
extern unsigned ring_size;
/* Compiler barrier - similar to what Linux uses */
#define barrier() asm volatile("" ::: "memory")
/* Is there a portable way to do this? */
#if defined(__x86_64__) || defined(__i386__)
#define cpu_relax() asm ("rep; nop" ::: "memory")
#else
#define cpu_relax() assert(0)
#endif
extern bool do_relax;
static inline void busy_wait(void)
{
if (do_relax)
cpu_relax();
else
/* prevent compiler from removing busy loops */
barrier();
}
/*
* Not using __ATOMIC_SEQ_CST since gcc docs say they are only synchronized
* with other __ATOMIC_SEQ_CST calls.
*/
#define smp_mb() __sync_synchronize()
/*
* This abuses the atomic builtins for thread fences, and
* adds a compiler barrier.
*/
#define smp_release() do { \
barrier(); \
__atomic_thread_fence(__ATOMIC_RELEASE); \
} while (0)
#define smp_acquire() do { \
__atomic_thread_fence(__ATOMIC_ACQUIRE); \
barrier(); \
} while (0)
#endif
/*
* Copyright (C) 2016 Red Hat, Inc.
* Author: Michael S. Tsirkin <mst@redhat.com>
* This work is licensed under the terms of the GNU GPL, version 2.
*
* Simple descriptor-based ring. virtio 0.9 compatible event index is used for
* signalling, unconditionally.
*/
#define _GNU_SOURCE
#include "main.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
/* Next - Where next entry will be written.
* Prev - "Next" value when event triggered previously.
* Event - Peer requested event after writing this entry.
*/
static inline bool need_event(unsigned short event,
unsigned short next,
unsigned short prev)
{
return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
}
/* Design:
* Guest adds descriptors with unique index values and DESC_HW in flags.
* Host overwrites used descriptors with correct len, index, and DESC_HW clear.
* Flags are always set last.
*/
#define DESC_HW 0x1
struct desc {
unsigned short flags;
unsigned short index;
unsigned len;
unsigned long long addr;
};
/* how much padding is needed to avoid false cache sharing */
#define HOST_GUEST_PADDING 0x80
/* Mostly read */
struct event {
unsigned short kick_index;
unsigned char reserved0[HOST_GUEST_PADDING - 2];
unsigned short call_index;
unsigned char reserved1[HOST_GUEST_PADDING - 2];
};
struct data {
void *buf; /* descriptor is writeable, we can't get buf from there */
void *data;
} *data;
struct desc *ring;
struct event *event;
struct guest {
unsigned avail_idx;
unsigned last_used_idx;
unsigned num_free;
unsigned kicked_avail_idx;
unsigned char reserved[HOST_GUEST_PADDING - 12];
} guest;
struct host {
/* we do not need to track last avail index
* unless we have more than one in flight.
*/
unsigned used_idx;
unsigned called_used_idx;
unsigned char reserved[HOST_GUEST_PADDING - 4];
} host;
/* implemented by ring */
void alloc_ring(void)
{
int ret;
int i;
ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
if (ret) {
perror("Unable to allocate ring buffer.\n");
exit(3);
}
event = malloc(sizeof *event);
if (!event) {
perror("Unable to allocate event buffer.\n");
exit(3);
}
memset(event, 0, sizeof *event);
guest.avail_idx = 0;
guest.kicked_avail_idx = -1;
guest.last_used_idx = 0;
host.used_idx = 0;
host.called_used_idx = -1;
for (i = 0; i < ring_size; ++i) {
struct desc desc = {
.index = i,
};
ring[i] = desc;
}
guest.num_free = ring_size;
data = malloc(ring_size * sizeof *data);
if (!data) {
perror("Unable to allocate data buffer.\n");
exit(3);
}
memset(data, 0, ring_size * sizeof *data);
}
/* guest side */
int add_inbuf(unsigned len, void *buf, void *datap)
{
unsigned head, index;
if (!guest.num_free)
return -1;
guest.num_free--;
head = (ring_size - 1) & (guest.avail_idx++);
/* Start with a write. On MESI architectures this helps
* avoid a shared state with consumer that is polling this descriptor.
*/
ring[head].addr = (unsigned long)(void*)buf;
ring[head].len = len;
/* read below might bypass write above. That is OK because it's just an
* optimization. If this happens, we will get the cache line in a
* shared state which is unfortunate, but probably not worth it to
* add an explicit full barrier to avoid this.
*/
barrier();
index = ring[head].index;
data[index].buf = buf;
data[index].data = datap;
/* Barrier A (for pairing) */
smp_release();
ring[head].flags = DESC_HW;
return 0;
}
void *get_buf(unsigned *lenp, void **bufp)
{
unsigned head = (ring_size - 1) & guest.last_used_idx;
unsigned index;
void *datap;
if (ring[head].flags & DESC_HW)
return NULL;
/* Barrier B (for pairing) */
smp_acquire();
*lenp = ring[head].len;
index = ring[head].index & (ring_size - 1);
datap = data[index].data;
*bufp = data[index].buf;
data[index].buf = NULL;
data[index].data = NULL;
guest.num_free++;
guest.last_used_idx++;
return datap;
}
void poll_used(void)
{
unsigned head = (ring_size - 1) & guest.last_used_idx;
while (ring[head].flags & DESC_HW)
busy_wait();
}
void disable_call()
{
/* Doing nothing to disable calls might cause
* extra interrupts, but reduces the number of cache misses.
*/
}
bool enable_call()
{
unsigned head = (ring_size - 1) & guest.last_used_idx;
event->call_index = guest.last_used_idx;
/* Flush call index write */
/* Barrier D (for pairing) */
smp_mb();
return ring[head].flags & DESC_HW;
}
void kick_available(void)
{
/* Flush in previous flags write */
/* Barrier C (for pairing) */