[GPU] vfetch_full fetching nothing still must calculate the address

This commit is contained in:
Triang3l 2022-01-09 16:26:05 +03:00
parent d6188c5d7e
commit 14b69fdb00
4 changed files with 69 additions and 43 deletions

View File

@ -2008,46 +2008,45 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type,
// Ensure vertex buffers are resident. // Ensure vertex buffers are resident.
// TODO(Triang3l): Cache residency for ranges in a way similar to how texture // TODO(Triang3l): Cache residency for ranges in a way similar to how texture
// validity is tracked. // validity is tracked.
uint64_t vertex_buffers_resident[2] = {}; const Shader::ConstantRegisterMap& constant_map_vertex =
for (const Shader::VertexBinding& vertex_binding : vertex_shader->constant_register_map();
vertex_shader->vertex_bindings()) { for (uint32_t i = 0; i < xe::countof(constant_map_vertex.vertex_fetch_bitmap);
uint32_t vfetch_index = vertex_binding.fetch_constant; ++i) {
if (vertex_buffers_resident[vfetch_index >> 6] & uint32_t vfetch_bits_remaining = constant_map_vertex.vertex_fetch_bitmap[i];
(uint64_t(1) << (vfetch_index & 63))) { uint32_t j;
continue; while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) {
} vfetch_bits_remaining &= ~(uint32_t(1) << j);
const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>( uint32_t vfetch_index = i * 32 + j;
XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); const auto& vfetch_constant = regs.Get<xenos::xe_gpu_vertex_fetch_t>(
switch (vfetch_constant.type) { XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2);
case xenos::FetchConstantType::kVertex: switch (vfetch_constant.type) {
break; case xenos::FetchConstantType::kVertex:
case xenos::FetchConstantType::kInvalidVertex:
if (cvars::gpu_allow_invalid_fetch_constants) {
break; break;
} case xenos::FetchConstantType::kInvalidVertex:
XELOGW( if (cvars::gpu_allow_invalid_fetch_constants) {
"Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " break;
"This " }
"is incorrect behavior, but you can try bypassing this by " XELOGW(
"launching Xenia with --gpu_allow_invalid_fetch_constants=true.", "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! "
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); "This is incorrect behavior, but you can try bypassing this by "
return false; "launching Xenia with --gpu_allow_invalid_fetch_constants=true.",
default: vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
XELOGW( return false;
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", default:
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); XELOGW(
"Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!",
vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1);
return false;
}
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
vfetch_constant.size << 2)) {
XELOGE(
"Failed to request vertex buffer at 0x{:08X} (size {}) in the "
"shared memory",
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false; return false;
}
} }
if (!shared_memory_->RequestRange(vfetch_constant.address << 2,
vfetch_constant.size << 2)) {
XELOGE(
"Failed to request vertex buffer at 0x{:08X} (size {}) in the shared "
"memory",
vfetch_constant.address << 2, vfetch_constant.size << 2);
return false;
}
vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1)
<< (vfetch_index & 63);
} }
// Gather memexport ranges and ensure the heaps for them are resident, and // Gather memexport ranges and ensure the heaps for them are resident, and

View File

@ -35,7 +35,9 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
uint32_t used_result_components = instr.result.GetUsedResultComponents(); uint32_t used_result_components = instr.result.GetUsedResultComponents();
uint32_t needed_words = xenos::GetVertexFormatNeededWords( uint32_t needed_words = xenos::GetVertexFormatNeededWords(
instr.attributes.data_format, used_result_components); instr.attributes.data_format, used_result_components);
if (!needed_words) { // If this is vfetch_full, the address may still be needed for vfetch_mini -
// don't exit before calculating the address.
if (!needed_words && instr.is_mini_fetch) {
// Nothing to load - just constant 0/1 writes, or the swizzle includes only // Nothing to load - just constant 0/1 writes, or the swizzle includes only
// components that don't exist in the format (writing zero instead of them). // components that don't exist in the format (writing zero instead of them).
// Unpacking assumes at least some word is needed. // Unpacking assumes at least some word is needed.
@ -107,6 +109,13 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction(
} }
} }
if (!needed_words) {
// The vfetch_full address has been loaded for the subsequent vfetch_mini,
// but there's no data to load.
StoreResult(instr.result, dxbc::Src::LF(0.0f));
return;
}
dxbc::Dest address_temp_dest(dxbc::Dest::R(system_temp_result_, 0b1000)); dxbc::Dest address_temp_dest(dxbc::Dest::R(system_temp_result_, 0b1000));
dxbc::Src address_temp_src( dxbc::Src address_temp_src(
dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW));

View File

@ -435,6 +435,11 @@ struct ParsedVertexFetchInstruction {
bool predicate_condition = false; bool predicate_condition = false;
// Describes how the instruction result is stored. // Describes how the instruction result is stored.
// Note that if the result doesn't have any components to write the fetched
// value to, the address calculation in vfetch_full must still be performed
// because such a vfetch_full may be used to setup addressing for vfetch_mini
// (wires in the color pass of 5454082B do vfetch_full to r2.000_, and then a
// true vfetch_mini).
InstructionResult result; InstructionResult result;
// Number of source operands. // Number of source operands.
@ -696,6 +701,9 @@ class Shader {
// Bitmap of all bool constants read by the shader. // Bitmap of all bool constants read by the shader.
// Each bit corresponds to a storage index [0-255]. // Each bit corresponds to a storage index [0-255].
uint32_t bool_bitmap[256 / 32]; uint32_t bool_bitmap[256 / 32];
// Bitmap of all vertex fetch constants read by the shader.
// Each bit corresponds to a storage index [0-95].
uint32_t vertex_fetch_bitmap[96 / 32];
// Total number of kConstantFloat registers read by the shader. // Total number of kConstantFloat registers read by the shader.
uint32_t float_count; uint32_t float_count;

View File

@ -282,13 +282,19 @@ void Shader::GatherVertexFetchInformation(
GatherFetchResultInformation(fetch_instr.result); GatherFetchResultInformation(fetch_instr.result);
// Don't bother setting up a binding for an instruction that fetches nothing. // Mini-fetches inherit the operands from full fetches.
if (!fetch_instr.result.GetUsedResultComponents()) { if (!fetch_instr.is_mini_fetch) {
return; for (size_t i = 0; i < fetch_instr.operand_count; ++i) {
GatherOperandInformation(fetch_instr.operands[i]);
}
} }
for (size_t i = 0; i < fetch_instr.operand_count; ++i) { // Don't bother setting up a binding for an instruction that fetches nothing.
GatherOperandInformation(fetch_instr.operands[i]); // In case of vfetch_full, however, it may still be used to set up addressing
// for the subsequent vfetch_mini, so operand information must still be
// gathered.
if (!fetch_instr.result.GetUsedResultComponents()) {
return;
} }
// Try to allocate an attribute on an existing binding. // Try to allocate an attribute on an existing binding.
@ -434,6 +440,10 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) {
constant_register_map_.float_dynamic_addressing = true; constant_register_map_.float_dynamic_addressing = true;
} }
break; break;
case InstructionStorageSource::kVertexFetchConstant:
constant_register_map_.vertex_fetch_bitmap[operand.storage_index >> 5] |=
uint32_t(1) << (operand.storage_index & 31);
break;
default: default:
break; break;
} }