diff --git a/src/xenia/gpu/d3d12/d3d12_command_processor.cc b/src/xenia/gpu/d3d12/d3d12_command_processor.cc index 5d9be3660..6a3f78bc0 100644 --- a/src/xenia/gpu/d3d12/d3d12_command_processor.cc +++ b/src/xenia/gpu/d3d12/d3d12_command_processor.cc @@ -2008,46 +2008,45 @@ bool D3D12CommandProcessor::IssueDraw(xenos::PrimitiveType primitive_type, // Ensure vertex buffers are resident. // TODO(Triang3l): Cache residency for ranges in a way similar to how texture // validity is tracked. - uint64_t vertex_buffers_resident[2] = {}; - for (const Shader::VertexBinding& vertex_binding : - vertex_shader->vertex_bindings()) { - uint32_t vfetch_index = vertex_binding.fetch_constant; - if (vertex_buffers_resident[vfetch_index >> 6] & - (uint64_t(1) << (vfetch_index & 63))) { - continue; - } - const auto& vfetch_constant = regs.Get( - XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); - switch (vfetch_constant.type) { - case xenos::FetchConstantType::kVertex: - break; - case xenos::FetchConstantType::kInvalidVertex: - if (cvars::gpu_allow_invalid_fetch_constants) { + const Shader::ConstantRegisterMap& constant_map_vertex = + vertex_shader->constant_register_map(); + for (uint32_t i = 0; i < xe::countof(constant_map_vertex.vertex_fetch_bitmap); + ++i) { + uint32_t vfetch_bits_remaining = constant_map_vertex.vertex_fetch_bitmap[i]; + uint32_t j; + while (xe::bit_scan_forward(vfetch_bits_remaining, &j)) { + vfetch_bits_remaining &= ~(uint32_t(1) << j); + uint32_t vfetch_index = i * 32 + j; + const auto& vfetch_constant = regs.Get( + XE_GPU_REG_SHADER_CONSTANT_FETCH_00_0 + vfetch_index * 2); + switch (vfetch_constant.type) { + case xenos::FetchConstantType::kVertex: break; - } - XELOGW( - "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " - "This " - "is incorrect behavior, but you can try bypassing this by " - "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", - vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); - return false; - default: - XELOGW( - "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", - vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + case xenos::FetchConstantType::kInvalidVertex: + if (cvars::gpu_allow_invalid_fetch_constants) { + break; + } + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) has \"invalid\" type! " + "This is incorrect behavior, but you can try bypassing this by " + "launching Xenia with --gpu_allow_invalid_fetch_constants=true.", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + default: + XELOGW( + "Vertex fetch constant {} ({:08X} {:08X}) is completely invalid!", + vfetch_index, vfetch_constant.dword_0, vfetch_constant.dword_1); + return false; + } + if (!shared_memory_->RequestRange(vfetch_constant.address << 2, + vfetch_constant.size << 2)) { + XELOGE( + "Failed to request vertex buffer at 0x{:08X} (size {}) in the " + "shared memory", + vfetch_constant.address << 2, vfetch_constant.size << 2); return false; + } } - if (!shared_memory_->RequestRange(vfetch_constant.address << 2, - vfetch_constant.size << 2)) { - XELOGE( - "Failed to request vertex buffer at 0x{:08X} (size {}) in the shared " - "memory", - vfetch_constant.address << 2, vfetch_constant.size << 2); - return false; - } - vertex_buffers_resident[vfetch_index >> 6] |= uint64_t(1) - << (vfetch_index & 63); } // Gather memexport ranges and ensure the heaps for them are resident, and diff --git a/src/xenia/gpu/dxbc_shader_translator_fetch.cc b/src/xenia/gpu/dxbc_shader_translator_fetch.cc index 63480b76c..016841bc5 100644 --- a/src/xenia/gpu/dxbc_shader_translator_fetch.cc +++ b/src/xenia/gpu/dxbc_shader_translator_fetch.cc @@ -35,7 +35,9 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( uint32_t used_result_components = instr.result.GetUsedResultComponents(); uint32_t needed_words = xenos::GetVertexFormatNeededWords( instr.attributes.data_format, used_result_components); - if (!needed_words) { + // If this is vfetch_full, the address may still be needed for vfetch_mini - + // don't exit before calculating the address. + if (!needed_words && instr.is_mini_fetch) { // Nothing to load - just constant 0/1 writes, or the swizzle includes only // components that don't exist in the format (writing zero instead of them). // Unpacking assumes at least some word is needed. @@ -107,6 +109,13 @@ void DxbcShaderTranslator::ProcessVertexFetchInstruction( } } + if (!needed_words) { + // The vfetch_full address has been loaded for the subsequent vfetch_mini, + // but there's no data to load. + StoreResult(instr.result, dxbc::Src::LF(0.0f)); + return; + } + dxbc::Dest address_temp_dest(dxbc::Dest::R(system_temp_result_, 0b1000)); dxbc::Src address_temp_src( dxbc::Src::R(system_temp_result_, dxbc::Src::kWWWW)); diff --git a/src/xenia/gpu/shader.h b/src/xenia/gpu/shader.h index 2ce81409a..9603134d4 100644 --- a/src/xenia/gpu/shader.h +++ b/src/xenia/gpu/shader.h @@ -435,6 +435,11 @@ struct ParsedVertexFetchInstruction { bool predicate_condition = false; // Describes how the instruction result is stored. + // Note that if the result doesn't have any components to write the fetched + // value to, the address calculation in vfetch_full must still be performed + // because such a vfetch_full may be used to setup addressing for vfetch_mini + // (wires in the color pass of 5454082B do vfetch_full to r2.000_, and then a + // true vfetch_mini). InstructionResult result; // Number of source operands. @@ -696,6 +701,9 @@ class Shader { // Bitmap of all bool constants read by the shader. // Each bit corresponds to a storage index [0-255]. uint32_t bool_bitmap[256 / 32]; + // Bitmap of all vertex fetch constants read by the shader. + // Each bit corresponds to a storage index [0-95]. + uint32_t vertex_fetch_bitmap[96 / 32]; // Total number of kConstantFloat registers read by the shader. uint32_t float_count; diff --git a/src/xenia/gpu/shader_translator.cc b/src/xenia/gpu/shader_translator.cc index 612edbfbf..9c1837779 100644 --- a/src/xenia/gpu/shader_translator.cc +++ b/src/xenia/gpu/shader_translator.cc @@ -282,13 +282,19 @@ void Shader::GatherVertexFetchInformation( GatherFetchResultInformation(fetch_instr.result); - // Don't bother setting up a binding for an instruction that fetches nothing. - if (!fetch_instr.result.GetUsedResultComponents()) { - return; + // Mini-fetches inherit the operands from full fetches. + if (!fetch_instr.is_mini_fetch) { + for (size_t i = 0; i < fetch_instr.operand_count; ++i) { + GatherOperandInformation(fetch_instr.operands[i]); + } } - for (size_t i = 0; i < fetch_instr.operand_count; ++i) { - GatherOperandInformation(fetch_instr.operands[i]); + // Don't bother setting up a binding for an instruction that fetches nothing. + // In case of vfetch_full, however, it may still be used to set up addressing + // for the subsequent vfetch_mini, so operand information must still be + // gathered. + if (!fetch_instr.result.GetUsedResultComponents()) { + return; } // Try to allocate an attribute on an existing binding. @@ -434,6 +440,10 @@ void Shader::GatherOperandInformation(const InstructionOperand& operand) { constant_register_map_.float_dynamic_addressing = true; } break; + case InstructionStorageSource::kVertexFetchConstant: + constant_register_map_.vertex_fetch_bitmap[operand.storage_index >> 5] |= + uint32_t(1) << (operand.storage_index & 31); + break; default: break; }