Commit f3cc0d27 authored by Rob Clark's avatar Rob Clark

freedreno: import libdrm_freedreno + redesign submit

In the pursuit of lowering driver overhead, it became clear that some
amount of redesign of how libdrm_freedreno constructs the submit ioctl
would be needed.  In particular, as the gallium driver is starting to
make heavier use of CP_SET_DRAW_STATE state groups/objects, the over-
head of tracking cmd buffers and relocs becomes too much.  And for
"streaming" state, which isn't ever reused (like uniform uploads) the
overhead of allocating/freeing ringbuffer[1] objects is too high.

This redesign makes two main changes:

 1) Introduces a fd_submit object for tracking bos and cmds table
    for the submit ioctl, making ringbuffer objects more light-
    weight.  This was previously done in the ringbuffer.  But we
    have many ringbuffer instances involved in a submit (gmem +
    draw + potentially 1000's of state-group rbs), and only need
    a single bos and cmds table.  (Reloc table is still per-rb)

    The submit is also a convenient place for a slab allocator for
    ringbuffer objects.  Other options would have required locking
    because, while we can guarantee allocations will only happen on
    a single thread, free's could happen either on the application
    thread or the flush_queue thread.  With the slab allocator in
    the submit object, any frees that happen on the flush_queue
    thread happen after we know that the application thread is done
    with the submit.

 2) Introduce a new "softpin" msm_ringbuffer_sp implementation that
    does not use relocs and only has cmds table entries for IB1 (ie.
    the cmdstream buffers that kernel needs to CP_INDIRECT_BUFFER
    to from the RB).  To do this properly will require some updates
    on the kernel side, so whether you get the softpin or legacy
    submit/ringbuffer implementation at runtime depends on your
    kernel version.

To make all these changes in libdrm would basically require adding a
libdrm_freedreno2, so this is a good point to just pull the libdrm code
into mesa.  Plus it allows for using mesa's hashtable, slab allocator,
etc.  And it lets us have asserts enabled for debug mesa buids but
omitted for release builds.  And it makes life easier if further API
changes become necessary.

At this point I haven't tried to pull in the kgsl backend.  Although
I left the level of vfunc indirection which would make it possible
to have other backends.  (And this was convenient to keep to allow
for the "softpin" ringbuffer to coexist.)

NOTE: if bisecting a build error takes you here, try a clean build.
There are a bunch of ways things can go wrong if you still have
libdrm_freedreno cflags.

[1] "ringbuffer" is probably a bad name, the only level of cmdstream
    buffer that is actually a ring is RB managed by kernel.  User-
    space cmdstream is all IB1/IB2 and state-groups.
Reviewed-by: 's avatarKristian H. Kristensen <hoegsberg@chromium.org>
Reviewed-by: 's avatarEric Engestrom <eric.engestrom@intel.com>
Signed-off-by: 's avatarRob Clark <robdclark@gmail.com>
parent aa02d7e8
......@@ -78,7 +78,6 @@ LIBDRM_AMDGPU_REQUIRED=2.4.95
LIBDRM_INTEL_REQUIRED=2.4.75
LIBDRM_NVVIEUX_REQUIRED=2.4.66
LIBDRM_NOUVEAU_REQUIRED=2.4.66
LIBDRM_FREEDRENO_REQUIRED=2.4.96
LIBDRM_ETNAVIV_REQUIRED=2.4.89
LIBDRM_VC4_REQUIRED=2.4.89
......@@ -2722,7 +2721,6 @@ if test -n "$with_gallium_drivers"; then
;;
xfreedreno)
HAVE_GALLIUM_FREEDRENO=yes
PKG_CHECK_MODULES([FREEDRENO], [libdrm >= $LIBDRM_FREEDRENO_REQUIRED libdrm_freedreno >= $LIBDRM_FREEDRENO_REQUIRED])
require_libdrm "freedreno"
;;
xetnaviv)
......
......@@ -1099,14 +1099,12 @@ dep_libdrm_amdgpu = null_dep
dep_libdrm_radeon = null_dep
dep_libdrm_nouveau = null_dep
dep_libdrm_etnaviv = null_dep
dep_libdrm_freedreno = null_dep
dep_libdrm_intel = null_dep
_drm_amdgpu_ver = '2.4.95'
_drm_radeon_ver = '2.4.71'
_drm_nouveau_ver = '2.4.66'
_drm_etnaviv_ver = '2.4.89'
_drm_freedreno_ver = '2.4.96'
_drm_intel_ver = '2.4.75'
_drm_ver = '2.4.75'
......@@ -1117,7 +1115,6 @@ _libdrm_checks = [
with_gallium_r300 or with_gallium_r600)],
['nouveau', (with_gallium_nouveau or with_dri_nouveau)],
['etnaviv', with_gallium_etnaviv],
['freedreno', with_gallium_freedreno],
]
# VC4 only needs core libdrm support of this version, not a libdrm_vc4
......
......@@ -27,6 +27,7 @@ include $(CLEAR_VARS)
LOCAL_SRC_FILES := \
$(C_SOURCES) \
$(drm_SOURCES) \
$(a2xx_SOURCES) \
$(a3xx_SOURCES) \
$(a4xx_SOURCES) \
......@@ -42,7 +43,7 @@ LOCAL_C_INCLUDES := \
LOCAL_GENERATED_SOURCES := $(MESA_GEN_NIR_H)
LOCAL_SHARED_LIBRARIES := libdrm_freedreno
LOCAL_SHARED_LIBRARIES := libdrm
LOCAL_STATIC_LIBRARIES := libmesa_glsl libmesa_nir
LOCAL_MODULE := libmesa_pipe_freedreno
......
......@@ -7,7 +7,8 @@ AM_CFLAGS = \
-I$(top_builddir)/src/compiler/nir \
-I$(top_srcdir)/src/compiler/nir \
$(GALLIUM_DRIVER_CFLAGS) \
$(FREEDRENO_CFLAGS)
$(LIBDRM_CFLAGS) \
$(VALGRIND_CFLAGS)
MKDIR_GEN = $(AM_V_at)$(MKDIR_P) $(@D)
ir3/ir3_nir_trig.c: ir3/ir3_nir_trig.py $(top_srcdir)/src/compiler/nir/nir_algebraic.py
......@@ -18,6 +19,7 @@ noinst_LTLIBRARIES = libfreedreno.la
libfreedreno_la_SOURCES = \
$(C_SOURCES) \
$(drm_SOURCES) \
$(a2xx_SOURCES) \
$(a3xx_SOURCES) \
$(a4xx_SOURCES) \
......@@ -45,6 +47,7 @@ ir3_compiler_LDADD = \
$(top_builddir)/src/util/libmesautil.la \
$(top_builddir)/src/mesa/libmesagallium.la \
$(GALLIUM_COMMON_LIB_DEPS) \
$(FREEDRENO_LIBS)
$(LIBDRM_LIBS) \
$(VALGRIND_LIBS)
EXTRA_DIST += meson.build
......@@ -40,6 +40,23 @@ C_SOURCES := \
freedreno_util.c \
freedreno_util.h
drm_SOURCES := \
drm/freedreno_bo.c \
drm/freedreno_bo_cache.c \
drm/freedreno_device.c \
drm/freedreno_drmif.h \
drm/freedreno_pipe.c \
drm/freedreno_priv.h \
drm/freedreno_ringbuffer.c \
drm/freedreno_ringbuffer.h \
drm/msm_bo.c \
drm/msm_device.c \
drm/msm_drm.h \
drm/msm_pipe.c \
drm/msm_priv.h \
drm/msm_ringbuffer.c \
drm/msm_ringbuffer_sp.c
a2xx_SOURCES := \
a2xx/a2xx.xml.h \
a2xx/disasm-a2xx.c \
......
......@@ -29,8 +29,6 @@
#include "util/u_upload_mgr.h"
#include "freedreno_drmif.h"
#include "freedreno_context.h"
#include "ir3_shader.h"
......
......@@ -29,8 +29,6 @@
#include "util/u_upload_mgr.h"
#include "freedreno_drmif.h"
#include "freedreno_context.h"
#include "ir3_shader.h"
......
......@@ -29,8 +29,6 @@
#include "util/u_upload_mgr.h"
#include "freedreno_drmif.h"
#include "freedreno_context.h"
#include "ir3_shader.h"
......
......@@ -197,8 +197,7 @@ fd5_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
// draw
if (!batch->lrz_clear) {
batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000);
fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem);
batch->lrz_clear = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
}
ring = batch->lrz_clear;
......
......@@ -30,8 +30,6 @@
#include "util/u_upload_mgr.h"
#include "freedreno_drmif.h"
#include "freedreno_context.h"
#include "ir3_shader.h"
......
......@@ -297,8 +297,7 @@ fd6_clear_lrz(struct fd_batch *batch, struct fd_resource *zsbuf, double depth)
// draw
if (!batch->lrz_clear) {
batch->lrz_clear = fd_ringbuffer_new(batch->ctx->pipe, 0x1000);
fd_ringbuffer_set_parent(batch->lrz_clear, batch->gmem);
batch->lrz_clear = fd_submit_new_ringbuffer(batch->submit, 0x1000, 0);
}
ring = batch->lrz_clear;
......
......@@ -359,8 +359,7 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
if (tex->num_samplers > 0) {
struct fd_ringbuffer *state =
fd_ringbuffer_new_flags(pipe, tex->num_samplers * 4 * 4,
FD_RINGBUFFER_OBJECT);
fd_ringbuffer_new_object(pipe, tex->num_samplers * 4 * 4);
for (unsigned i = 0; i < tex->num_samplers; i++) {
static const struct fd6_sampler_stateobj dummy_sampler = {};
const struct fd6_sampler_stateobj *sampler = tex->samplers[i] ?
......@@ -390,8 +389,7 @@ fd6_emit_textures(struct fd_pipe *pipe, struct fd_ringbuffer *ring,
if (tex->num_textures > 0) {
struct fd_ringbuffer *state =
fd_ringbuffer_new_flags(pipe, tex->num_textures * 16 * 4,
FD_RINGBUFFER_OBJECT);
fd_ringbuffer_new_object(pipe, tex->num_textures * 16 * 4);
for (unsigned i = 0; i < tex->num_textures; i++) {
static const struct fd6_pipe_sampler_view dummy_view = {};
const struct fd6_pipe_sampler_view *view = tex->textures[i] ?
......@@ -534,9 +532,8 @@ fd6_build_vbo_state(struct fd6_emit *emit, const struct ir3_shader_variant *vp)
const struct fd_vertex_state *vtx = emit->vtx;
int32_t i, j;
struct fd_ringbuffer *ring =
fd_ringbuffer_new_flags(emit->ctx->pipe, 4 * (10 * vp->inputs_count + 2),
FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit,
4 * (10 * vp->inputs_count + 2), FD_RINGBUFFER_STREAMING);
for (i = 0, j = 0; i <= vp->inputs_count; i++) {
if (vp->inputs[i].sysval)
......@@ -597,9 +594,8 @@ build_zsa(struct fd6_emit *emit, bool binning_pass)
uint32_t gras_lrz_cntl = zsa->gras_lrz_cntl;
uint32_t rb_lrz_cntl = zsa->rb_lrz_cntl;
struct fd_ringbuffer *ring =
fd_ringbuffer_new_flags(emit->ctx->pipe, 16,
FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(emit->ctx->batch->submit,
16, FD_RINGBUFFER_STREAMING);
if (emit->no_lrz_write || !rsc->lrz || !rsc->lrz_valid) {
gras_lrz_cntl = 0;
......@@ -786,9 +782,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
FD_DIRTY_SHADER_SSBO | FD_DIRTY_SHADER_IMAGE)
if (ctx->dirty_shader[PIPE_SHADER_VERTEX] & DIRTY_CONST) {
struct fd_ringbuffer *vsconstobj =
fd_ringbuffer_new_flags(ctx->pipe, 0x1000,
FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
struct fd_ringbuffer *vsconstobj = fd_submit_new_ringbuffer(
ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
ir3_emit_vs_consts(vp, vsconstobj, ctx, emit->info);
fd6_emit_add_group(emit, vsconstobj, FD6_GROUP_VS_CONST, 0x7);
......@@ -796,9 +791,8 @@ fd6_emit_state(struct fd_ringbuffer *ring, struct fd6_emit *emit)
}
if (ctx->dirty_shader[PIPE_SHADER_FRAGMENT] & DIRTY_CONST) {
struct fd_ringbuffer *fsconstobj =
fd_ringbuffer_new_flags(ctx->pipe, 0x1000,
FD_RINGBUFFER_OBJECT | FD_RINGBUFFER_STREAMING);
struct fd_ringbuffer *fsconstobj = fd_submit_new_ringbuffer(
ctx->batch->submit, 0x1000, FD_RINGBUFFER_STREAMING);
ir3_emit_fs_consts(fp, fsconstobj, ctx);
fd6_emit_add_group(emit, fsconstobj, FD6_GROUP_FS_CONST, 0x6);
......
/*
* Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "os/os_mman.h"
#include "freedreno_drmif.h"
#include "freedreno_priv.h"
pthread_mutex_t table_lock = PTHREAD_MUTEX_INITIALIZER;
void bo_del(struct fd_bo *bo);
/* set buffer name, and add to table, call w/ table_lock held: */
static void set_name(struct fd_bo *bo, uint32_t name)
{
bo->name = name;
/* add ourself into the handle table: */
_mesa_hash_table_insert(bo->dev->name_table, &bo->name, bo);
}
/* lookup a buffer, call w/ table_lock held: */
static struct fd_bo * lookup_bo(struct hash_table *tbl, uint32_t key)
{
struct fd_bo *bo = NULL;
struct hash_entry *entry = _mesa_hash_table_search(tbl, &key);
if (entry) {
/* found, incr refcnt and return: */
bo = fd_bo_ref(entry->data);
/* don't break the bucket if this bo was found in one */
list_delinit(&bo->list);
}
return bo;
}
/* allocate a new buffer object, call w/ table_lock held */
static struct fd_bo * bo_from_handle(struct fd_device *dev,
uint32_t size, uint32_t handle)
{
struct fd_bo *bo;
bo = dev->funcs->bo_from_handle(dev, size, handle);
if (!bo) {
struct drm_gem_close req = {
.handle = handle,
};
drmIoctl(dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
return NULL;
}
bo->dev = fd_device_ref(dev);
bo->size = size;
bo->handle = handle;
p_atomic_set(&bo->refcnt, 1);
list_inithead(&bo->list);
/* add ourself into the handle table: */
_mesa_hash_table_insert(dev->handle_table, &bo->handle, bo);
return bo;
}
static struct fd_bo *
bo_new(struct fd_device *dev, uint32_t size, uint32_t flags,
struct fd_bo_cache *cache)
{
struct fd_bo *bo = NULL;
uint32_t handle;
int ret;
bo = fd_bo_cache_alloc(cache, &size, flags);
if (bo)
return bo;
ret = dev->funcs->bo_new_handle(dev, size, flags, &handle);
if (ret)
return NULL;
pthread_mutex_lock(&table_lock);
bo = bo_from_handle(dev, size, handle);
pthread_mutex_unlock(&table_lock);
VG_BO_ALLOC(bo);
return bo;
}
struct fd_bo *
fd_bo_new(struct fd_device *dev, uint32_t size, uint32_t flags)
{
struct fd_bo *bo = bo_new(dev, size, flags, &dev->bo_cache);
if (bo)
bo->bo_reuse = BO_CACHE;
return bo;
}
/* internal function to allocate bo's that use the ringbuffer cache
* instead of the normal bo_cache. The purpose is, because cmdstream
* bo's get vmap'd on the kernel side, and that is expensive, we want
* to re-use cmdstream bo's for cmdstream and not unrelated purposes.
*/
struct fd_bo *
fd_bo_new_ring(struct fd_device *dev, uint32_t size, uint32_t flags)
{
struct fd_bo *bo = bo_new(dev, size, flags, &dev->ring_cache);
if (bo)
bo->bo_reuse = RING_CACHE;
return bo;
}
struct fd_bo *
fd_bo_from_handle(struct fd_device *dev, uint32_t handle, uint32_t size)
{
struct fd_bo *bo = NULL;
pthread_mutex_lock(&table_lock);
bo = lookup_bo(dev->handle_table, handle);
if (bo)
goto out_unlock;
bo = bo_from_handle(dev, size, handle);
VG_BO_ALLOC(bo);
out_unlock:
pthread_mutex_unlock(&table_lock);
return bo;
}
struct fd_bo *
fd_bo_from_dmabuf(struct fd_device *dev, int fd)
{
int ret, size;
uint32_t handle;
struct fd_bo *bo;
pthread_mutex_lock(&table_lock);
ret = drmPrimeFDToHandle(dev->fd, fd, &handle);
if (ret) {
pthread_mutex_unlock(&table_lock);
return NULL;
}
bo = lookup_bo(dev->handle_table, handle);
if (bo)
goto out_unlock;
/* lseek() to get bo size */
size = lseek(fd, 0, SEEK_END);
lseek(fd, 0, SEEK_CUR);
bo = bo_from_handle(dev, size, handle);
VG_BO_ALLOC(bo);
out_unlock:
pthread_mutex_unlock(&table_lock);
return bo;
}
struct fd_bo * fd_bo_from_name(struct fd_device *dev, uint32_t name)
{
struct drm_gem_open req = {
.name = name,
};
struct fd_bo *bo;
pthread_mutex_lock(&table_lock);
/* check name table first, to see if bo is already open: */
bo = lookup_bo(dev->name_table, name);
if (bo)
goto out_unlock;
if (drmIoctl(dev->fd, DRM_IOCTL_GEM_OPEN, &req)) {
ERROR_MSG("gem-open failed: %s", strerror(errno));
goto out_unlock;
}
bo = lookup_bo(dev->handle_table, req.handle);
if (bo)
goto out_unlock;
bo = bo_from_handle(dev, req.size, req.handle);
if (bo) {
set_name(bo, name);
VG_BO_ALLOC(bo);
}
out_unlock:
pthread_mutex_unlock(&table_lock);
return bo;
}
uint64_t fd_bo_get_iova(struct fd_bo *bo)
{
if (!bo->iova)
bo->iova = bo->funcs->iova(bo);
return bo->iova;
}
void fd_bo_put_iova(struct fd_bo *bo)
{
/* currently a no-op */
}
struct fd_bo * fd_bo_ref(struct fd_bo *bo)
{
p_atomic_inc(&bo->refcnt);
return bo;
}
void fd_bo_del(struct fd_bo *bo)
{
struct fd_device *dev = bo->dev;
if (!atomic_dec_and_test(&bo->refcnt))
return;
pthread_mutex_lock(&table_lock);
if ((bo->bo_reuse == BO_CACHE) && (fd_bo_cache_free(&dev->bo_cache, bo) == 0))
goto out;
if ((bo->bo_reuse == RING_CACHE) && (fd_bo_cache_free(&dev->ring_cache, bo) == 0))
goto out;
bo_del(bo);
fd_device_del_locked(dev);
out:
pthread_mutex_unlock(&table_lock);
}
/* Called under table_lock */
void bo_del(struct fd_bo *bo)
{
VG_BO_FREE(bo);
if (bo->map)
os_munmap(bo->map, bo->size);
/* TODO probably bo's in bucket list get removed from
* handle table??
*/
if (bo->handle) {
struct drm_gem_close req = {
.handle = bo->handle,
};
_mesa_hash_table_remove_key(bo->dev->handle_table, &bo->handle);
if (bo->name)
_mesa_hash_table_remove_key(bo->dev->name_table, &bo->name);
drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_CLOSE, &req);
}
bo->funcs->destroy(bo);
}
int fd_bo_get_name(struct fd_bo *bo, uint32_t *name)
{
if (!bo->name) {
struct drm_gem_flink req = {
.handle = bo->handle,
};
int ret;
ret = drmIoctl(bo->dev->fd, DRM_IOCTL_GEM_FLINK, &req);
if (ret) {
return ret;
}
pthread_mutex_lock(&table_lock);
set_name(bo, req.name);
pthread_mutex_unlock(&table_lock);
bo->bo_reuse = NO_CACHE;
}
*name = bo->name;
return 0;
}
uint32_t fd_bo_handle(struct fd_bo *bo)
{
return bo->handle;
}
int fd_bo_dmabuf(struct fd_bo *bo)
{
int ret, prime_fd;
ret = drmPrimeHandleToFD(bo->dev->fd, bo->handle, DRM_CLOEXEC,
&prime_fd);
if (ret) {
ERROR_MSG("failed to get dmabuf fd: %d", ret);
return ret;
}
bo->bo_reuse = NO_CACHE;
return prime_fd;
}
uint32_t fd_bo_size(struct fd_bo *bo)
{
return bo->size;
}
void * fd_bo_map(struct fd_bo *bo)
{
if (!bo->map) {
uint64_t offset;
int ret;
ret = bo->funcs->offset(bo, &offset);
if (ret) {
return NULL;
}
bo->map = os_mmap(0, bo->size, PROT_READ | PROT_WRITE, MAP_SHARED,
bo->dev->fd, offset);
if (bo->map == MAP_FAILED) {
ERROR_MSG("mmap failed: %s", strerror(errno));
bo->map = NULL;
}
}
return bo->map;
}
/* a bit odd to take the pipe as an arg, but it's a, umm, quirk of kgsl.. */
int fd_bo_cpu_prep(struct fd_bo *bo, struct fd_pipe *pipe, uint32_t op)
{
return bo->funcs->cpu_prep(bo, pipe, op);
}
void fd_bo_cpu_fini(struct fd_bo *bo)
{
bo->funcs->cpu_fini(bo);
}
/*
* Copyright (C) 2012-2018 Rob Clark <robclark@freedesktop.org>
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*
* Authors:
* Rob Clark <robclark@freedesktop.org>
*/
#include "freedreno_drmif.h"
#include "freedreno_priv.h"
void bo_del(struct fd_bo *bo);
extern pthread_mutex_t table_lock;
static void
add_bucket(struct fd_bo_cache *cache, int size)
{
unsigned int i = cache->num_buckets;
assert(i < ARRAY_SIZE(cache->cache_bucket));
list_inithead(&cache->cache_bucket[i].list);
cache->cache_bucket[i].size = size;
cache->num_buckets++;
}
/**
* @coarse: if true, only power-of-two bucket sizes, otherwise
* fill in for a bit smoother size curve..
*/
void
fd_bo_cache_init(struct fd_bo_cache *cache, int coarse)
{
unsigned long size, cache_max_size = 64 * 1024 * 1024;
/* OK, so power of two buckets was too wasteful of memory.
* Give 3 other sizes between each power of two, to hopefully
* cover things accurately enough. (The alternative is
* probably to just go for exact matching of sizes, and assume
* that for things like composited window resize the tiled
* width/height alignment and rounding of sizes to pages will
* get us useful cache hit rates anyway)
*/
add_bucket(cache, 4096);
add_bucket(cache, 4096 * 2);
if (!coarse)
add_bucket(cache, 4096 * 3);
/* Initialize the linked lists for BO reuse cache. */
for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
add_bucket(cache, size);
if (!coarse) {
add_bucket(cache, size + size * 1 / 4);
add_bucket(cache, size + size * 2 / 4);
add_bucket(cache, size + size * 3 / 4);
}
}
}
/* Frees older cached buffers. Called under table_lock */
void
fd_bo_cache_cleanup(struct fd_bo_cache *cache, time_t time)
{
int i;
if (cache->time == time)
return;
for (i = 0; i < cache->num_buckets; i++) {
struct fd_bo_bucket *bucket = &cache->cache_bucket[i];
struct fd_bo *bo;
while (!LIST_IS_EMPTY(&bucket->list)) {
bo = LIST_ENTRY(struct fd_bo, bucket->list.next, list);
/* keep things in cache for at least 1 second: */
if (time && ((time - bo->free_time) <= 1))
break;
VG_BO_OBTAIN(bo);
list_del(&bo->list);
bo_del(bo);
}
}
cache->time = time;
}
static struct fd_bo_bucket * get_bucket(struct fd_bo_cache *cache, uint32_t size)
{
int i;
/* hmm, this is what intel does, but I suppose we could calculate our
* way to the correct bucket size rather than looping..
*/
for (i = 0; i < cache->num_buckets; i++) {
struct fd_bo_bucket *bucket = &cache->cache_bucket[i];