[GPU] Round tessellation patch vertex count up (by @deaklajos #2007)

Also move the clamping of the guest index count to the index buffer size to the place before it's read in calculations
This commit is contained in:
Triang3l 2022-04-14 21:19:12 +03:00
parent 38aca269e1
commit ef8a60e011
1 changed files with 32 additions and 15 deletions

View File

@ -222,7 +222,8 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
// Parse the primitive type and the tessellation state (VGT_OUTPUT_PATH_CNTL
// is only used in the explicit major mode) - there are cases in games when
// this register is left over after usage of tessellation in draws that don't
// need it.
// need it. Also perform needed vertex count adjustments based on the
// primitive type.
xenos::PrimitiveType guest_primitive_type = vgt_draw_initiator.prim_type;
xenos::PrimitiveType host_primitive_type = guest_primitive_type;
bool tessellation_enabled =
@ -233,6 +234,7 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
xenos::TessellationMode tessellation_mode =
regs.Get<reg::VGT_HOS_CNTL>().tess_mode;
Shader::HostVertexShaderType host_vertex_shader_type;
uint32_t guest_draw_vertex_count = vgt_draw_initiator.num_indices;
if (tessellation_enabled) {
// Currently only supporting tessellation in known cases for safety, and not
// yet converting patch strips / fans to patch lists until games using them
@ -289,12 +291,29 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
// - 4D5307ED - water - adaptive.
host_vertex_shader_type =
Shader::HostVertexShaderType::kTriangleDomainPatchIndexed;
// See the comment about the rounding for kQuadPatch.
guest_draw_vertex_count =
xe::round_up(guest_draw_vertex_count, uint32_t(3), false);
break;
case xenos::PrimitiveType::kQuadPatch:
// - 4D5307F1 - continuous.
// - 4D5307F1 - ground - continuous.
// - 4D5307F2 - garden ground - adaptive.
host_vertex_shader_type =
Shader::HostVertexShaderType::kQuadDomainPatchIndexed;
// While it's known that num_indices represents the control point count
// (4D5307E6, for example, for water triangle patches, performs N
// invocations of the memexporting shader calculating the edge
// tessellation factors for one patch, and then draws the water with
// num_indices = 3 * N), 4D5307F1 ground is drawn with num_indices = 1
// rather than 4. Unlike Direct3D 11 tessellation, where the patch count
// is `floor(vertex count / control points per patch)`, on the Xenos,
// the count appears to be `ceil` of that value (like a
// `for (i = 0; i < num_indices; i += 4)` loop is used to emit the
// patches). It's unlikely, however, that this adjustment should also be
// done for regular primitive types with tessellation enabled, as
// they're handled as usual primitive topologies, just post-tessellated.
guest_draw_vertex_count =
xe::align(guest_draw_vertex_count, uint32_t(4));
break;
default:
// TODO(Triang3l): Support line patches.
@ -346,7 +365,17 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
}
// Process the indices.
uint32_t guest_draw_vertex_count = vgt_draw_initiator.num_indices;
auto vgt_dma_size = regs.Get<reg::VGT_DMA_SIZE>();
if (vgt_draw_initiator.source_select == xenos::SourceSelect::kDMA &&
guest_draw_vertex_count > vgt_dma_size.num_words) {
XELOGW(
"Primitive processor: {} vertices attempted to be drawn with an index "
"buffer only containing {}. Should be fetching zero indices instead of "
"overflowing ones, but this is a rare situation, so not handled yet. "
"Report the game to Xenia developers!",
guest_draw_vertex_count, vgt_dma_size.num_words);
guest_draw_vertex_count = vgt_dma_size.num_words;
}
uint32_t line_loop_closing_index = 0;
uint32_t guest_index_base;
CachedResult cacheable;
@ -420,9 +449,6 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
uint32_t(vgt_draw_initiator.source_select));
return false;
}
auto vgt_dma_size = regs.Get<reg::VGT_DMA_SIZE>();
xenos::IndexFormat guest_index_format = vgt_draw_initiator.index_size;
cacheable.host_index_format = guest_index_format;
// Normalize the endian and the reset index.
@ -490,15 +516,6 @@ bool PrimitiveProcessor::Process(ProcessingResult& result_out) {
}
// Get the index buffer memory range.
if (guest_draw_vertex_count > vgt_dma_size.num_words) {
XELOGW(
"Primitive processor: {} vertices attempted to be drawn with an "
"index buffer only containing {}. Should be fetching zero indices "
"instead of overflowing ones, but this is a rare situation, so not "
"handled yet. Report the game to Xenia developers!",
guest_draw_vertex_count, vgt_dma_size.num_words);
guest_draw_vertex_count = vgt_dma_size.num_words;
}
uint32_t index_size_log2 =
guest_index_format == xenos::IndexFormat::kInt16 ? 1 : 2;
// The base should already be aligned, but aligning here too for safety.