forked from ShuriZma/suyu
1
0
Fork 0

MacroHLE: Add Index Buffer size estimation.

This commit is contained in:
Fernando Sahmkow 2022-10-21 01:46:51 +02:00
parent c541559767
commit 93ac5a6a6d
5 changed files with 74 additions and 10 deletions

View File

@ -179,6 +179,13 @@ u32 Maxwell3D::GetMaxCurrentVertices() {
return num_vertices;
}
size_t Maxwell3D::EstimateIndexBufferSize() {
GPUVAddr start_address = regs.index_buffer.StartAddress();
GPUVAddr end_address = regs.index_buffer.EndAddress();
return std::min<size_t>(memory_manager.GetMemoryLayoutSize(start_address),
static_cast<size_t>(end_address - start_address));
}
u32 Maxwell3D::ProcessShadowRam(u32 method, u32 argument) {
// Keep track of the register value in shadow_state when requested.
const auto control = shadow_state.shadow_ram_control;

View File

@ -3077,6 +3077,8 @@ public:
u32 GetMaxCurrentVertices();
size_t EstimateIndexBufferSize();
/// Handles a write to the CLEAR_BUFFERS register.
void ProcessClearBuffers(u32 layer_count);

View File

@ -163,12 +163,16 @@ public:
maxwell3d.RefreshParameters();
minimum_limit = std::max(parameters[3], minimum_limit);
}
const u32 base_vertex = parameters[8];
const u32 base_instance = parameters[9];
maxwell3d.regs.vertex_id_base = base_vertex;
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
const u32 base_size = std::max(minimum_limit, estimate);
const u32 element_base = parameters[4];
const u32 base_instance = parameters[5];
maxwell3d.regs.index_buffer.first = 0;
maxwell3d.regs.index_buffer.count = base_size; // Use a fixed size, just for mapping
maxwell3d.regs.draw.topology.Assign(topology);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.CallMethod(0x8e3, 0x640, true);
maxwell3d.CallMethod(0x8e4, base_vertex, true);
maxwell3d.CallMethod(0x8e4, element_base, true);
maxwell3d.CallMethod(0x8e5, base_instance, true);
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true;
@ -179,7 +183,7 @@ public:
params.max_draw_counts = 1;
params.stride = 0;
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, minimum_limit);
maxwell3d.draw_manager->DrawIndexedIndirect(topology, 0, base_size);
maxwell3d.CallMethod(0x8e3, 0x640, true);
maxwell3d.CallMethod(0x8e4, 0x0, true);
maxwell3d.CallMethod(0x8e5, 0x0, true);
@ -271,9 +275,11 @@ public:
if (check_limit) {
minimum_limit = std::max(highest_limit, minimum_limit);
}
const u32 estimate = static_cast<u32>(maxwell3d.EstimateIndexBufferSize());
const u32 base_size = std::max(minimum_limit, estimate);
maxwell3d.regs.index_buffer.first = 0;
maxwell3d.regs.index_buffer.count = std::max(highest_limit, minimum_limit);
maxwell3d.regs.index_buffer.count = std::max(highest_limit, base_size);
maxwell3d.dirty.flags[VideoCommon::Dirty::IndexBuffer] = true;
auto& params = maxwell3d.draw_manager->GetIndirectParams();
params.is_indexed = true;

View File

@ -325,9 +325,15 @@ template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typenam
inline void MemoryManager::MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size,
FuncMapped&& func_mapped, FuncReserved&& func_reserved,
FuncUnmapped&& func_unmapped) const {
static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v<FuncMapped, bool>;
static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v<FuncReserved, bool>;
static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v<FuncUnmapped, bool>;
using FuncMappedReturn =
typename std::invoke_result<FuncMapped, std::size_t, std::size_t, std::size_t>::type;
using FuncReservedReturn =
typename std::invoke_result<FuncReserved, std::size_t, std::size_t, std::size_t>::type;
using FuncUnmappedReturn =
typename std::invoke_result<FuncUnmapped, std::size_t, std::size_t, std::size_t>::type;
static constexpr bool BOOL_BREAK_MAPPED = std::is_same_v<FuncMappedReturn, bool>;
static constexpr bool BOOL_BREAK_RESERVED = std::is_same_v<FuncReservedReturn, bool>;
static constexpr bool BOOL_BREAK_UNMAPPED = std::is_same_v<FuncUnmappedReturn, bool>;
u64 used_page_size;
u64 used_page_mask;
u64 used_page_bits;
@ -571,6 +577,47 @@ size_t MemoryManager::MaxContinousRange(GPUVAddr gpu_addr, size_t size) const {
return range_so_far;
}
size_t MemoryManager::GetMemoryLayoutSize(GPUVAddr gpu_addr) const {
PTEKind base_kind = GetPageKind(gpu_addr);
if (base_kind == PTEKind::INVALID) {
return 0;
}
size_t range_so_far = 0;
bool result{false};
auto fail = [&]([[maybe_unused]] std::size_t page_index, [[maybe_unused]] std::size_t offset,
std::size_t copy_amount) {
result = true;
return true;
};
auto short_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
PTEKind base_kind_other = GetKind<false>((page_index << page_bits) + offset);
if (base_kind != base_kind_other) {
result = true;
return true;
}
range_so_far += copy_amount;
return false;
};
auto big_check = [&](std::size_t page_index, std::size_t offset, std::size_t copy_amount) {
PTEKind base_kind_other = GetKind<true>((page_index << big_page_bits) + offset);
if (base_kind != base_kind_other) {
result = true;
return true;
}
range_so_far += copy_amount;
return false;
};
auto check_short_pages = [&](std::size_t page_index, std::size_t offset,
std::size_t copy_amount) {
GPUVAddr base = (page_index << big_page_bits) + offset;
MemoryOperation<false>(base, copy_amount, short_check, fail, fail);
return result;
};
MemoryOperation<true>(gpu_addr, address_space_size - gpu_addr, big_check, fail,
check_short_pages);
return range_so_far;
}
void MemoryManager::InvalidateRegion(GPUVAddr gpu_addr, size_t size) const {
auto do_nothing = [&]([[maybe_unused]] std::size_t page_index,
[[maybe_unused]] std::size_t offset,

View File

@ -118,6 +118,8 @@ public:
PTEKind GetPageKind(GPUVAddr gpu_addr) const;
size_t GetMemoryLayoutSize(GPUVAddr gpu_addr) const;
private:
template <bool is_big_pages, typename FuncMapped, typename FuncReserved, typename FuncUnmapped>
inline void MemoryOperation(GPUVAddr gpu_src_addr, std::size_t size, FuncMapped&& func_mapped,