Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
What's new
7
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
Emil Velikov
mesa
Commits
73749caf
Commit
73749caf
authored
Oct 20, 2017
by
Bas Nieuwenhuizen
Committed by
Dave Airlie
Oct 20, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
radv: calculate and emit GFX9 GS registers to pipeline state.
Reviewed-by:
Dave Airlie
<
airlied@redhat.com
>
parent
9961ae24
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
158 additions
and
7 deletions
+158
-7
src/amd/vulkan/radv_cmd_buffer.c
src/amd/vulkan/radv_cmd_buffer.c
+20
-5
src/amd/vulkan/radv_pipeline.c
src/amd/vulkan/radv_pipeline.c
+124
-2
src/amd/vulkan/radv_private.h
src/amd/vulkan/radv_private.h
+8
-0
src/amd/vulkan/radv_shader.c
src/amd/vulkan/radv_shader.c
+6
-0
No files found.
src/amd/vulkan/radv_cmd_buffer.c
View file @
73749caf
...
...
@@ -864,11 +864,26 @@ radv_emit_geometry_shader(struct radv_cmd_buffer *cmd_buffer,
ws
->
cs_add_buffer
(
cmd_buffer
->
cs
,
gs
->
bo
,
8
);
radv_emit_prefetch
(
cmd_buffer
,
va
,
gs
->
code_size
);
radeon_set_sh_reg_seq
(
cmd_buffer
->
cs
,
R_00B220_SPI_SHADER_PGM_LO_GS
,
4
);
radeon_emit
(
cmd_buffer
->
cs
,
va
>>
8
);
radeon_emit
(
cmd_buffer
->
cs
,
va
>>
40
);
radeon_emit
(
cmd_buffer
->
cs
,
gs
->
rsrc1
);
radeon_emit
(
cmd_buffer
->
cs
,
gs
->
rsrc2
);
if
(
cmd_buffer
->
device
->
physical_device
->
rad_info
.
chip_class
>=
GFX9
)
{
radeon_set_sh_reg_seq
(
cmd_buffer
->
cs
,
R_00B210_SPI_SHADER_PGM_LO_ES
,
2
);
radeon_emit
(
cmd_buffer
->
cs
,
va
>>
8
);
radeon_emit
(
cmd_buffer
->
cs
,
va
>>
40
);
radeon_set_sh_reg_seq
(
cmd_buffer
->
cs
,
R_00B228_SPI_SHADER_PGM_RSRC1_GS
,
2
);
radeon_emit
(
cmd_buffer
->
cs
,
gs
->
rsrc1
);
radeon_emit
(
cmd_buffer
->
cs
,
gs
->
rsrc2
|
S_00B22C_LDS_SIZE
(
pipeline
->
graphics
.
gs
.
lds_size
));
radeon_set_context_reg
(
cmd_buffer
->
cs
,
R_028A44_VGT_GS_ONCHIP_CNTL
,
pipeline
->
graphics
.
gs
.
vgt_gs_onchip_cntl
);
radeon_set_context_reg
(
cmd_buffer
->
cs
,
R_028A94_VGT_GS_MAX_PRIMS_PER_SUBGROUP
,
pipeline
->
graphics
.
gs
.
vgt_gs_max_prims_per_subgroup
);
radeon_set_context_reg
(
cmd_buffer
->
cs
,
R_028AAC_VGT_ESGS_RING_ITEMSIZE
,
pipeline
->
graphics
.
gs
.
vgt_esgs_ring_itemsize
);
}
else
{
radeon_set_sh_reg_seq
(
cmd_buffer
->
cs
,
R_00B220_SPI_SHADER_PGM_LO_GS
,
4
);
radeon_emit
(
cmd_buffer
->
cs
,
va
>>
8
);
radeon_emit
(
cmd_buffer
->
cs
,
va
>>
40
);
radeon_emit
(
cmd_buffer
->
cs
,
gs
->
rsrc1
);
radeon_emit
(
cmd_buffer
->
cs
,
gs
->
rsrc2
);
}
radv_emit_hw_vs
(
cmd_buffer
,
pipeline
,
pipeline
->
gs_copy_shader
,
&
pipeline
->
gs_copy_shader
->
info
.
vs
.
outinfo
);
...
...
src/amd/vulkan/radv_pipeline.c
View file @
73749caf
...
...
@@ -1161,6 +1161,123 @@ radv_compute_vs_key(const VkGraphicsPipelineCreateInfo *pCreateInfo, bool as_es,
return
key
;
}
static
void
calculate_gfx9_gs_info
(
const
VkGraphicsPipelineCreateInfo
*
pCreateInfo
,
struct
radv_pipeline
*
pipeline
)
{
struct
ac_shader_variant_info
*
gs_info
=
&
pipeline
->
shaders
[
MESA_SHADER_GEOMETRY
]
->
info
;
struct
ac_es_output_info
*
es_info
=
radv_pipeline_has_tess
(
pipeline
)
?
&
gs_info
->
tes
.
es_info
:
&
gs_info
->
vs
.
es_info
;
unsigned
gs_num_invocations
=
MAX2
(
gs_info
->
gs
.
invocations
,
1
);
bool
uses_adjacency
;
switch
(
pCreateInfo
->
pInputAssemblyState
->
topology
)
{
case
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY
:
case
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY
:
case
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY
:
case
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY
:
uses_adjacency
=
false
;
break
;
default:
uses_adjacency
=
false
;
break
;
}
/* All these are in dwords: */
/* We can't allow using the whole LDS, because GS waves compete with
* other shader stages for LDS space. */
const
unsigned
max_lds_size
=
8
*
1024
;
const
unsigned
esgs_itemsize
=
es_info
->
esgs_itemsize
/
4
;
unsigned
esgs_lds_size
;
/* All these are per subgroup: */
const
unsigned
max_out_prims
=
32
*
1024
;
const
unsigned
max_es_verts
=
255
;
const
unsigned
ideal_gs_prims
=
64
;
unsigned
max_gs_prims
,
gs_prims
;
unsigned
min_es_verts
,
es_verts
,
worst_case_es_verts
;
assert
(
gs_num_invocations
<=
32
);
/* GL maximum */
if
(
uses_adjacency
||
gs_num_invocations
>
1
)
max_gs_prims
=
127
/
gs_num_invocations
;
else
max_gs_prims
=
255
;
/* MAX_PRIMS_PER_SUBGROUP = gs_prims * max_vert_out * gs_invocations.
* Make sure we don't go over the maximum value.
*/
if
(
gs_info
->
gs
.
vertices_out
>
0
)
{
max_gs_prims
=
MIN2
(
max_gs_prims
,
max_out_prims
/
(
gs_info
->
gs
.
vertices_out
*
gs_num_invocations
));
}
assert
(
max_gs_prims
>
0
);
/* If the primitive has adjacency, halve the number of vertices
* that will be reused in multiple primitives.
*/
min_es_verts
=
gs_info
->
gs
.
vertices_in
/
(
uses_adjacency
?
2
:
1
);
gs_prims
=
MIN2
(
ideal_gs_prims
,
max_gs_prims
);
worst_case_es_verts
=
MIN2
(
min_es_verts
*
gs_prims
,
max_es_verts
);
/* Compute ESGS LDS size based on the worst case number of ES vertices
* needed to create the target number of GS prims per subgroup.
*/
esgs_lds_size
=
esgs_itemsize
*
worst_case_es_verts
;
/* If total LDS usage is too big, refactor partitions based on ratio
* of ESGS item sizes.
*/
if
(
esgs_lds_size
>
max_lds_size
)
{
/* Our target GS Prims Per Subgroup was too large. Calculate
* the maximum number of GS Prims Per Subgroup that will fit
* into LDS, capped by the maximum that the hardware can support.
*/
gs_prims
=
MIN2
((
max_lds_size
/
(
esgs_itemsize
*
min_es_verts
)),
max_gs_prims
);
assert
(
gs_prims
>
0
);
worst_case_es_verts
=
MIN2
(
min_es_verts
*
gs_prims
,
max_es_verts
);
esgs_lds_size
=
esgs_itemsize
*
worst_case_es_verts
;
assert
(
esgs_lds_size
<=
max_lds_size
);
}
/* Now calculate remaining ESGS information. */
if
(
esgs_lds_size
)
es_verts
=
MIN2
(
esgs_lds_size
/
esgs_itemsize
,
max_es_verts
);
else
es_verts
=
max_es_verts
;
/* Vertices for adjacency primitives are not always reused, so restore
* it for ES_VERTS_PER_SUBGRP.
*/
min_es_verts
=
gs_info
->
gs
.
vertices_in
;
/* For normal primitives, the VGT only checks if they are past the ES
* verts per subgroup after allocating a full GS primitive and if they
* are, kick off a new subgroup. But if those additional ES verts are
* unique (e.g. not reused) we need to make sure there is enough LDS
* space to account for those ES verts beyond ES_VERTS_PER_SUBGRP.
*/
es_verts
-=
min_es_verts
-
1
;
uint32_t
es_verts_per_subgroup
=
es_verts
;
uint32_t
gs_prims_per_subgroup
=
gs_prims
;
uint32_t
gs_inst_prims_in_subgroup
=
gs_prims
*
gs_num_invocations
;
uint32_t
max_prims_per_subgroup
=
gs_inst_prims_in_subgroup
*
gs_info
->
gs
.
vertices_out
;
pipeline
->
graphics
.
gs
.
lds_size
=
align
(
esgs_lds_size
,
128
)
/
128
;
pipeline
->
graphics
.
gs
.
vgt_gs_onchip_cntl
=
S_028A44_ES_VERTS_PER_SUBGRP
(
es_verts_per_subgroup
)
|
S_028A44_GS_PRIMS_PER_SUBGRP
(
gs_prims_per_subgroup
)
|
S_028A44_GS_INST_PRIMS_IN_SUBGRP
(
gs_inst_prims_in_subgroup
);
pipeline
->
graphics
.
gs
.
vgt_gs_max_prims_per_subgroup
=
S_028A94_MAX_PRIMS_PER_SUBGROUP
(
max_prims_per_subgroup
);
pipeline
->
graphics
.
gs
.
vgt_esgs_ring_itemsize
=
esgs_itemsize
;
assert
(
max_prims_per_subgroup
<=
max_out_prims
);
}
static
void
calculate_gs_ring_sizes
(
struct
radv_pipeline
*
pipeline
)
{
...
...
@@ -1194,7 +1311,9 @@ calculate_gs_ring_sizes(struct radv_pipeline *pipeline)
esgs_ring_size
=
align
(
esgs_ring_size
,
alignment
);
gsvs_ring_size
=
align
(
gsvs_ring_size
,
alignment
);
pipeline
->
graphics
.
esgs_ring_size
=
CLAMP
(
esgs_ring_size
,
min_esgs_ring_size
,
max_size
);
if
(
pipeline
->
device
->
physical_device
->
rad_info
.
chip_class
<=
VI
)
pipeline
->
graphics
.
esgs_ring_size
=
CLAMP
(
esgs_ring_size
,
min_esgs_ring_size
,
max_size
);
pipeline
->
graphics
.
gsvs_ring_size
=
MIN2
(
gsvs_ring_size
,
max_size
);
}
...
...
@@ -1916,8 +2035,11 @@ radv_pipeline_init(struct radv_pipeline *pipeline,
pipeline
->
graphics
.
vgt_shader_stages_en
=
stages
;
if
(
radv_pipeline_has_gs
(
pipeline
))
if
(
radv_pipeline_has_gs
(
pipeline
))
{
calculate_gs_ring_sizes
(
pipeline
);
if
(
device
->
physical_device
->
rad_info
.
chip_class
>=
GFX9
)
calculate_gfx9_gs_info
(
pCreateInfo
,
pipeline
);
}
if
(
radv_pipeline_has_tess
(
pipeline
))
{
if
(
pipeline
->
graphics
.
prim
==
V_008958_DI_PT_PATCH
)
{
...
...
src/amd/vulkan/radv_private.h
View file @
73749caf
...
...
@@ -1050,6 +1050,13 @@ struct radv_tessellation_state {
uint32_t
tf_param
;
};
struct
radv_gs_state
{
uint32_t
vgt_gs_onchip_cntl
;
uint32_t
vgt_gs_max_prims_per_subgroup
;
uint32_t
vgt_esgs_ring_itemsize
;
uint32_t
lds_size
;
};
struct
radv_vertex_elements_info
{
uint32_t
rsrc_word3
[
MAX_VERTEX_ATTRIBS
];
uint32_t
format_size
[
MAX_VERTEX_ATTRIBS
];
...
...
@@ -1084,6 +1091,7 @@ struct radv_pipeline {
struct
radv_raster_state
raster
;
struct
radv_multisample_state
ms
;
struct
radv_tessellation_state
tess
;
struct
radv_gs_state
gs
;
uint32_t
db_shader_control
;
uint32_t
shader_z_format
;
unsigned
prim
;
...
...
src/amd/vulkan/radv_shader.c
View file @
73749caf
...
...
@@ -400,6 +400,12 @@ radv_fill_shader_variant(struct radv_device *device,
}
if
(
device
->
physical_device
->
rad_info
.
chip_class
>=
GFX9
&&
stage
==
MESA_SHADER_GEOMETRY
)
{
/* TODO: Figure out how many we actually need. */
variant
->
rsrc1
|=
S_00B228_GS_VGPR_COMP_CNT
(
3
);
variant
->
rsrc2
|=
S_00B22C_ES_VGPR_COMP_CNT
(
3
)
|
S_00B22C_OC_LDS_EN
(
1
);
}
else
if
(
device
->
physical_device
->
rad_info
.
chip_class
>=
GFX9
&&
stage
==
MESA_SHADER_TESS_CTRL
)
variant
->
rsrc1
|=
S_00B428_LS_VGPR_COMP_CNT
(
vgpr_comp_cnt
);
else
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment