Commit c495c0ad authored by Nicolai Hähnle's avatar Nicolai Hähnle

radeonsi: implement set_shader_buffers

Reviewed-by: default avatarMarek Olšák <marek.olsak@amd.com>
Reviewed-by: default avatarEdward O'Callaghan <eocallaghan@alterapraxis.com>
parent 73c8b85b
......@@ -746,6 +746,55 @@ static void si_set_constant_buffer(struct pipe_context *ctx, uint shader, uint s
buffers->desc.list_dirty = true;
}
/* SHADER BUFFERS */
static void si_set_shader_buffers(struct pipe_context *ctx, unsigned shader,
unsigned start_slot, unsigned count,
struct pipe_shader_buffer *sbuffers)
{
struct si_context *sctx = (struct si_context *)ctx;
struct si_buffer_resources *buffers = &sctx->shader_buffers[shader];
unsigned i;
assert(start_slot + count <= SI_NUM_SHADER_BUFFERS);
for (i = 0; i < count; ++i) {
struct pipe_shader_buffer *sbuffer = sbuffers ? &sbuffers[i] : NULL;
struct r600_resource *buf;
unsigned slot = start_slot + i;
uint32_t *desc = buffers->desc.list + slot * 4;
uint64_t va;
if (!sbuffer || !sbuffer->buffer) {
pipe_resource_reference(&buffers->buffers[slot], NULL);
memset(desc, 0, sizeof(uint32_t) * 4);
buffers->desc.enabled_mask &= ~(1llu << slot);
continue;
}
buf = (struct r600_resource *)sbuffer->buffer;
va = buf->gpu_address + sbuffer->buffer_offset;
desc[0] = va;
desc[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) |
S_008F04_STRIDE(0);
desc[2] = sbuffer->buffer_size;
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) |
S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) |
S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
pipe_resource_reference(&buffers->buffers[slot], &buf->b.b);
radeon_add_to_buffer_list(&sctx->b, &sctx->b.gfx, buf,
buffers->shader_usage, buffers->priority);
buffers->desc.enabled_mask |= 1llu << slot;
}
buffers->desc.list_dirty = true;
}
/* RING BUFFERS */
void si_set_ring_buffer(struct pipe_context *ctx, uint shader, uint slot,
......@@ -1078,10 +1127,12 @@ static void si_invalidate_buffer(struct pipe_context *ctx, struct pipe_resource
}
}
/* Constant buffers. */
/* Constant and shader buffers. */
for (shader = 0; shader < SI_NUM_SHADERS; shader++) {
si_reset_buffer_resources(sctx, &sctx->const_buffers[shader],
buf, old_va);
si_reset_buffer_resources(sctx, &sctx->shader_buffers[shader],
buf, old_va);
}
/* Texture buffers - update virtual addresses in sampler view descriptors. */
......@@ -1261,6 +1312,7 @@ void si_emit_shader_userdata(struct si_context *sctx, struct r600_atom *atom)
si_emit_shader_pointer(sctx, &sctx->rw_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->const_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->shader_buffers[i].desc, base, false);
si_emit_shader_pointer(sctx, &sctx->samplers[i].views.desc, base, false);
si_emit_shader_pointer(sctx, &sctx->images[i].desc, base, false);
}
......@@ -1280,6 +1332,9 @@ void si_init_all_descriptors(struct si_context *sctx)
si_init_buffer_resources(&sctx->rw_buffers[i],
SI_NUM_RW_BUFFERS, SI_SGPR_RW_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_RINGS_STREAMOUT);
si_init_buffer_resources(&sctx->shader_buffers[i],
SI_NUM_SHADER_BUFFERS, SI_SGPR_SHADER_BUFFERS,
RADEON_USAGE_READWRITE, RADEON_PRIO_SHADER_RW_BUFFER);
si_init_descriptors(&sctx->samplers[i].views.desc,
SI_SGPR_SAMPLERS, 16, SI_NUM_SAMPLERS,
......@@ -1297,6 +1352,7 @@ void si_init_all_descriptors(struct si_context *sctx)
sctx->b.b.bind_sampler_states = si_bind_sampler_states;
sctx->b.b.set_shader_images = si_set_shader_images;
sctx->b.b.set_constant_buffer = si_set_constant_buffer;
sctx->b.b.set_shader_buffers = si_set_shader_buffers;
sctx->b.b.set_sampler_views = si_set_sampler_views;
sctx->b.b.set_stream_output_targets = si_set_streamout_targets;
sctx->b.invalidate_buffer = si_invalidate_buffer;
......@@ -1319,6 +1375,7 @@ bool si_upload_shader_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
if (!si_upload_descriptors(sctx, &sctx->const_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->rw_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->shader_buffers[i].desc) ||
!si_upload_descriptors(sctx, &sctx->samplers[i].views.desc) ||
!si_upload_descriptors(sctx, &sctx->images[i].desc))
return false;
......@@ -1333,6 +1390,7 @@ void si_release_all_descriptors(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_release_buffer_resources(&sctx->const_buffers[i]);
si_release_buffer_resources(&sctx->rw_buffers[i]);
si_release_buffer_resources(&sctx->shader_buffers[i]);
si_release_sampler_views(&sctx->samplers[i].views);
si_release_image_views(&sctx->images[i]);
}
......@@ -1346,6 +1404,7 @@ void si_all_descriptors_begin_new_cs(struct si_context *sctx)
for (i = 0; i < SI_NUM_SHADERS; i++) {
si_buffer_resources_begin_new_cs(sctx, &sctx->const_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->rw_buffers[i]);
si_buffer_resources_begin_new_cs(sctx, &sctx->shader_buffers[i]);
si_sampler_views_begin_new_cs(sctx, &sctx->samplers[i].views);
si_image_views_begin_new_cs(sctx, &sctx->images[i]);
}
......
......@@ -241,6 +241,7 @@ struct si_context {
struct si_descriptors vertex_buffers;
struct si_buffer_resources const_buffers[SI_NUM_SHADERS];
struct si_buffer_resources rw_buffers[SI_NUM_SHADERS];
struct si_buffer_resources shader_buffers[SI_NUM_SHADERS];
struct si_textures_info samplers[SI_NUM_SHADERS];
struct si_images_info images[SI_NUM_SHADERS];
......
......@@ -4450,7 +4450,8 @@ static void create_function(struct si_shader_context *ctx)
params[SI_PARAM_CONST_BUFFERS] = const_array(ctx->v16i8, SI_NUM_CONST_BUFFERS);
params[SI_PARAM_SAMPLERS] = const_array(ctx->v8i32, SI_NUM_SAMPLERS);
params[SI_PARAM_IMAGES] = const_array(ctx->v8i32, SI_NUM_IMAGES);
last_array_pointer = SI_PARAM_IMAGES;
params[SI_PARAM_SHADER_BUFFERS] = const_array(ctx->v4i32, SI_NUM_SHADER_BUFFERS);
last_array_pointer = SI_PARAM_SHADER_BUFFERS;
switch (ctx->type) {
case TGSI_PROCESSOR_VERTEX:
......@@ -6034,6 +6035,7 @@ static bool si_compile_tcs_epilog(struct si_screen *sscreen,
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
params[SI_PARAM_TCS_OUT_OFFSETS] = ctx.i32;
params[SI_PARAM_TCS_OUT_LAYOUT] = ctx.i32;
params[SI_PARAM_TCS_IN_LAYOUT] = ctx.i32;
......@@ -6284,6 +6286,7 @@ static bool si_compile_ps_epilog(struct si_screen *sscreen,
params[SI_PARAM_CONST_BUFFERS] = ctx.i64;
params[SI_PARAM_SAMPLERS] = ctx.i64;
params[SI_PARAM_IMAGES] = ctx.i64;
params[SI_PARAM_SHADER_BUFFERS] = ctx.i64;
params[SI_PARAM_ALPHA_REF] = ctx.f32;
last_array_pointer = -1;
last_sgpr = SI_PARAM_ALPHA_REF;
......
......@@ -81,95 +81,97 @@ struct radeon_shader_reloc;
#define SI_SGPR_CONST_BUFFERS 2
#define SI_SGPR_SAMPLERS 4 /* images & sampler states interleaved */
#define SI_SGPR_IMAGES 6
#define SI_SGPR_VERTEX_BUFFERS 8 /* VS only */
#define SI_SGPR_BASE_VERTEX 10 /* VS only */
#define SI_SGPR_START_INSTANCE 11 /* VS only */
#define SI_SGPR_VS_STATE_BITS 12 /* VS(VS) only */
#define SI_SGPR_LS_OUT_LAYOUT 12 /* VS(LS) only */
#define SI_SGPR_TCS_OUT_OFFSETS 8 /* TCS & TES only */
#define SI_SGPR_TCS_OUT_LAYOUT 9 /* TCS & TES only */
#define SI_SGPR_TCS_IN_LAYOUT 10 /* TCS only */
#define SI_SGPR_ALPHA_REF 8 /* PS only */
#define SI_VS_NUM_USER_SGPR 13 /* API VS */
#define SI_ES_NUM_USER_SGPR 12 /* API VS */
#define SI_LS_NUM_USER_SGPR 13 /* API VS */
#define SI_TCS_NUM_USER_SGPR 11
#define SI_TES_NUM_USER_SGPR 10
#define SI_GS_NUM_USER_SGPR 8
#define SI_SGPR_SHADER_BUFFERS 8
#define SI_SGPR_VERTEX_BUFFERS 10 /* VS only */
#define SI_SGPR_BASE_VERTEX 12 /* VS only */
#define SI_SGPR_START_INSTANCE 13 /* VS only */
#define SI_SGPR_VS_STATE_BITS 14 /* VS(VS) only */
#define SI_SGPR_LS_OUT_LAYOUT 14 /* VS(LS) only */
#define SI_SGPR_TCS_OUT_OFFSETS 10 /* TCS & TES only */
#define SI_SGPR_TCS_OUT_LAYOUT 11 /* TCS & TES only */
#define SI_SGPR_TCS_IN_LAYOUT 12 /* TCS only */
#define SI_SGPR_ALPHA_REF 10 /* PS only */
#define SI_VS_NUM_USER_SGPR 15 /* API VS */
#define SI_ES_NUM_USER_SGPR 14 /* API VS */
#define SI_LS_NUM_USER_SGPR 15 /* API VS */
#define SI_TCS_NUM_USER_SGPR 13
#define SI_TES_NUM_USER_SGPR 12
#define SI_GS_NUM_USER_SGPR 10
#define SI_GSCOPY_NUM_USER_SGPR 4
#define SI_PS_NUM_USER_SGPR 9
#define SI_PS_NUM_USER_SGPR 11
/* LLVM function parameter indices */
#define SI_PARAM_RW_BUFFERS 0
#define SI_PARAM_CONST_BUFFERS 1
#define SI_PARAM_SAMPLERS 2
#define SI_PARAM_IMAGES 3
#define SI_PARAM_SHADER_BUFFERS 4
/* VS only parameters */
#define SI_PARAM_VERTEX_BUFFERS 4
#define SI_PARAM_BASE_VERTEX 5
#define SI_PARAM_START_INSTANCE 6
#define SI_PARAM_VERTEX_BUFFERS 5
#define SI_PARAM_BASE_VERTEX 6
#define SI_PARAM_START_INSTANCE 7
/* [0] = clamp vertex color */
#define SI_PARAM_VS_STATE_BITS 7
#define SI_PARAM_VS_STATE_BITS 8
/* the other VS parameters are assigned dynamically */
/* Offsets where TCS outputs and TCS patch outputs live in LDS:
* [0:15] = TCS output patch0 offset / 16, max = NUM_PATCHES * 32 * 32
* [16:31] = TCS output patch0 offset for per-patch / 16, max = NUM_PATCHES*32*32* + 32*32
*/
#define SI_PARAM_TCS_OUT_OFFSETS 4 /* for TCS & TES */
#define SI_PARAM_TCS_OUT_OFFSETS 5 /* for TCS & TES */
/* Layout of TCS outputs / TES inputs:
* [0:12] = stride between output patches in dwords, num_outputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between output vertices in dwords = num_inputs * 4, max = 32*4
* [26:31] = gl_PatchVerticesIn, max = 32
*/
#define SI_PARAM_TCS_OUT_LAYOUT 5 /* for TCS & TES */
#define SI_PARAM_TCS_OUT_LAYOUT 6 /* for TCS & TES */
/* Layout of LS outputs / TCS inputs
* [0:12] = stride between patches in dwords = num_inputs * num_vertices * 4, max = 32*32*4
* [13:20] = stride between vertices in dwords = num_inputs * 4, max = 32*4
*/
#define SI_PARAM_TCS_IN_LAYOUT 6 /* TCS only */
#define SI_PARAM_LS_OUT_LAYOUT 7 /* same value as TCS_IN_LAYOUT, LS only */
#define SI_PARAM_TCS_IN_LAYOUT 7 /* TCS only */
#define SI_PARAM_LS_OUT_LAYOUT 8 /* same value as TCS_IN_LAYOUT, LS only */
/* TCS only parameters. */
#define SI_PARAM_TESS_FACTOR_OFFSET 7
#define SI_PARAM_PATCH_ID 8
#define SI_PARAM_REL_IDS 9
#define SI_PARAM_TESS_FACTOR_OFFSET 8
#define SI_PARAM_PATCH_ID 9
#define SI_PARAM_REL_IDS 10
/* GS only parameters */
#define SI_PARAM_GS2VS_OFFSET 4
#define SI_PARAM_GS_WAVE_ID 5
#define SI_PARAM_VTX0_OFFSET 6
#define SI_PARAM_VTX1_OFFSET 7
#define SI_PARAM_PRIMITIVE_ID 8
#define SI_PARAM_VTX2_OFFSET 9
#define SI_PARAM_VTX3_OFFSET 10
#define SI_PARAM_VTX4_OFFSET 11
#define SI_PARAM_VTX5_OFFSET 12
#define SI_PARAM_GS_INSTANCE_ID 13
#define SI_PARAM_GS2VS_OFFSET 5
#define SI_PARAM_GS_WAVE_ID 6
#define SI_PARAM_VTX0_OFFSET 7
#define SI_PARAM_VTX1_OFFSET 8
#define SI_PARAM_PRIMITIVE_ID 9
#define SI_PARAM_VTX2_OFFSET 10
#define SI_PARAM_VTX3_OFFSET 11
#define SI_PARAM_VTX4_OFFSET 12
#define SI_PARAM_VTX5_OFFSET 13
#define SI_PARAM_GS_INSTANCE_ID 14
/* PS only parameters */
#define SI_PARAM_ALPHA_REF 4
#define SI_PARAM_PRIM_MASK 5
#define SI_PARAM_PERSP_SAMPLE 6
#define SI_PARAM_PERSP_CENTER 7
#define SI_PARAM_PERSP_CENTROID 8
#define SI_PARAM_PERSP_PULL_MODEL 9
#define SI_PARAM_LINEAR_SAMPLE 10
#define SI_PARAM_LINEAR_CENTER 11
#define SI_PARAM_LINEAR_CENTROID 12
#define SI_PARAM_LINE_STIPPLE_TEX 13
#define SI_PARAM_POS_X_FLOAT 14
#define SI_PARAM_POS_Y_FLOAT 15
#define SI_PARAM_POS_Z_FLOAT 16
#define SI_PARAM_POS_W_FLOAT 17
#define SI_PARAM_FRONT_FACE 18
#define SI_PARAM_ANCILLARY 19
#define SI_PARAM_SAMPLE_COVERAGE 20
#define SI_PARAM_POS_FIXED_PT 21
#define SI_PARAM_ALPHA_REF 5
#define SI_PARAM_PRIM_MASK 6
#define SI_PARAM_PERSP_SAMPLE 7
#define SI_PARAM_PERSP_CENTER 8
#define SI_PARAM_PERSP_CENTROID 9
#define SI_PARAM_PERSP_PULL_MODEL 10
#define SI_PARAM_LINEAR_SAMPLE 11
#define SI_PARAM_LINEAR_CENTER 12
#define SI_PARAM_LINEAR_CENTROID 13
#define SI_PARAM_LINE_STIPPLE_TEX 14
#define SI_PARAM_POS_X_FLOAT 15
#define SI_PARAM_POS_Y_FLOAT 16
#define SI_PARAM_POS_Z_FLOAT 17
#define SI_PARAM_POS_W_FLOAT 18
#define SI_PARAM_FRONT_FACE 19
#define SI_PARAM_ANCILLARY 20
#define SI_PARAM_SAMPLE_COVERAGE 21
#define SI_PARAM_POS_FIXED_PT 22
#define SI_NUM_PARAMS (SI_PARAM_POS_FIXED_PT + 9) /* +8 for COLOR[0..1] */
......
......@@ -161,6 +161,8 @@ struct si_shader_data {
#define SI_NUM_IMAGES 16
#define SI_NUM_SHADER_BUFFERS 16
/* Read-write buffer slots.
*
* Ring buffers: 0..1
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment