forked from ShuriZma/suyu
1
0
Fork 0

Query Cache: Fix guest side sample counting

This commit is contained in:
Fernando Sahmkow 2023-08-19 21:49:38 +02:00
parent 282ae8fa51
commit 2fea1b8407
5 changed files with 97 additions and 46 deletions

View File

@ -586,12 +586,6 @@ void Maxwell3D::ProcessQueryCondition() {
} }
void Maxwell3D::ProcessCounterReset() { void Maxwell3D::ProcessCounterReset() {
#if ANDROID
if (!Settings::IsGPULevelHigh()) {
// This is problematic on Android, disable on GPU Normal.
return;
}
#endif
switch (regs.clear_report_value) { switch (regs.clear_report_value) {
case Regs::ClearReport::ZPassPixelCount: case Regs::ClearReport::ZPassPixelCount:
rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64); rasterizer->ResetCounter(VideoCommon::QueryType::ZPassPixelCount64);

View File

@ -18,7 +18,6 @@ enum class QueryFlagBits : u32 {
IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified. IsInvalidated = 1 << 6, ///< Indicates the value of th query has been nullified.
IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query. IsOrphan = 1 << 7, ///< Indicates the query has not been set by a guest query.
IsFence = 1 << 8, ///< Indicates the query is a fence. IsFence = 1 << 8, ///< Indicates the query is a fence.
IsQueuedForAsyncFlush = 1 << 9, ///< Indicates that the query can be flushed at any moment
}; };
DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits) DECLARE_ENUM_FLAG_OPERATORS(QueryFlagBits)

View File

@ -256,8 +256,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
u8* pointer = impl->cpu_memory.GetPointer(cpu_addr); u8* pointer = impl->cpu_memory.GetPointer(cpu_addr);
u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8); u8* pointer_timestamp = impl->cpu_memory.GetPointer(cpu_addr + 8);
bool is_synced = !Settings::IsGPULevelHigh() && is_fence; bool is_synced = !Settings::IsGPULevelHigh() && is_fence;
std::function<void()> operation( std::function<void()> operation([this, is_synced, streamer, query_base = query, query_location,
[this, is_synced, query_base = query, query_location, pointer, pointer_timestamp] { pointer, pointer_timestamp] {
if (True(query_base->flags & QueryFlagBits::IsInvalidated)) { if (True(query_base->flags & QueryFlagBits::IsInvalidated)) {
if (!is_synced) [[likely]] { if (!is_synced) [[likely]] {
impl->pending_unregister.push_back(query_location); impl->pending_unregister.push_back(query_location);
@ -268,6 +268,8 @@ void QueryCacheBase<Traits>::CounterReport(GPUVAddr addr, QueryType counter_type
UNREACHABLE(); UNREACHABLE();
return; return;
} }
query_base->value += streamer->GetAmmendValue();
streamer->SetAccumulationValue(query_base->value);
if (True(query_base->flags & QueryFlagBits::HasTimestamp)) { if (True(query_base->flags & QueryFlagBits::HasTimestamp)) {
u64 timestamp = impl->gpu.GetTicks(); u64 timestamp = impl->gpu.GetTicks();
std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp)); std::memcpy(pointer_timestamp, &timestamp, sizeof(timestamp));
@ -354,9 +356,9 @@ void QueryCacheBase<Traits>::NotifySegment(bool resume) {
if (resume) { if (resume) {
impl->runtime.ResumeHostConditionalRendering(); impl->runtime.ResumeHostConditionalRendering();
} else { } else {
impl->runtime.PauseHostConditionalRendering();
CounterClose(VideoCommon::QueryType::ZPassPixelCount64); CounterClose(VideoCommon::QueryType::ZPassPixelCount64);
CounterClose(VideoCommon::QueryType::StreamingByteCount); CounterClose(VideoCommon::QueryType::StreamingByteCount);
impl->runtime.PauseHostConditionalRendering();
} }
} }

View File

@ -78,6 +78,14 @@ public:
return dependence_mask; return dependence_mask;
} }
u64 GetAmmendValue() const {
return ammend_value;
}
void SetAccumulationValue(u64 new_value) {
acumulation_value = new_value;
}
protected: protected:
void MakeDependent(StreamerInterface* depend_on) { void MakeDependent(StreamerInterface* depend_on) {
dependence_mask |= 1ULL << depend_on->id; dependence_mask |= 1ULL << depend_on->id;
@ -87,6 +95,8 @@ protected:
const size_t id; const size_t id;
u64 dependence_mask; u64 dependence_mask;
u64 dependent_mask; u64 dependent_mask;
u64 ammend_value{};
u64 acumulation_value{};
}; };
template <typename QueryType> template <typename QueryType>

View File

@ -110,13 +110,16 @@ struct HostSyncValues {
class SamplesStreamer : public BaseStreamer { class SamplesStreamer : public BaseStreamer {
public: public:
explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_, const Device& device_, explicit SamplesStreamer(size_t id_, QueryCacheRuntime& runtime_,
VideoCore::RasterizerInterface* rasterizer_, const Device& device_,
Scheduler& scheduler_, const MemoryAllocator& memory_allocator_) Scheduler& scheduler_, const MemoryAllocator& memory_allocator_)
: BaseStreamer(id_), runtime{runtime_}, device{device_}, scheduler{scheduler_}, : BaseStreamer(id_), runtime{runtime_}, rasterizer{rasterizer_}, device{device_},
memory_allocator{memory_allocator_} { scheduler{scheduler_}, memory_allocator{memory_allocator_} {
BuildResolveBuffer(); BuildResolveBuffer();
current_bank = nullptr; current_bank = nullptr;
current_query = nullptr; current_query = nullptr;
ammend_value = 0;
acumulation_value = 0;
} }
~SamplesStreamer() = default; ~SamplesStreamer() = default;
@ -151,6 +154,11 @@ public:
PauseCounter(); PauseCounter();
} }
AbandonCurrentQuery(); AbandonCurrentQuery();
std::function<void()> func([this, counts = pending_flush_queries.size()] {
ammend_value = 0;
acumulation_value = 0;
});
rasterizer->SyncOperation(std::move(func));
} }
void CloseCounter() override { void CloseCounter() override {
@ -244,7 +252,7 @@ public:
} }
if (query->size_slots > 1) { if (query->size_slots > 1) {
// This is problematic. // This is problematic.
UNIMPLEMENTED(); // UNIMPLEMENTED();
} }
query->flags |= VideoCommon::QueryFlagBits::IsHostSynced; query->flags |= VideoCommon::QueryFlagBits::IsHostSynced;
auto loc_data = offsets[query->start_bank_id]; auto loc_data = offsets[query->start_bank_id];
@ -255,16 +263,20 @@ public:
}); });
} }
ReplicateCurrentQueryIfNeeded();
std::function<void()> func([this] { ammend_value = acumulation_value; });
rasterizer->SyncOperation(std::move(func));
AbandonCurrentQuery(); AbandonCurrentQuery();
pending_sync.clear(); pending_sync.clear();
} }
size_t WriteCounter(VAddr address, bool has_timestamp, u32 value, size_t WriteCounter(VAddr address, bool has_timestamp, u32 value,
[[maybe_unused]] std::optional<u32> subreport) override { [[maybe_unused]] std::optional<u32> subreport) override {
PauseCounter();
auto index = BuildQuery(); auto index = BuildQuery();
auto* new_query = GetQuery(index); auto* new_query = GetQuery(index);
new_query->guest_address = address; new_query->guest_address = address;
new_query->value = 100; new_query->value = 0;
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan; new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
if (has_timestamp) { if (has_timestamp) {
new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp; new_query->flags |= VideoCommon::QueryFlagBits::HasTimestamp;
@ -291,6 +303,7 @@ public:
void PushUnsyncedQueries() override { void PushUnsyncedQueries() override {
PauseCounter(); PauseCounter();
current_bank->Close();
{ {
std::scoped_lock lk(flush_guard); std::scoped_lock lk(flush_guard);
pending_flush_sets.emplace_back(std::move(pending_flush_queries)); pending_flush_sets.emplace_back(std::move(pending_flush_queries));
@ -429,6 +442,34 @@ private:
current_query_id = 0; current_query_id = 0;
} }
void ReplicateCurrentQueryIfNeeded() {
if (pending_sync.empty()) {
return;
}
if (!current_query) {
return;
}
auto index = BuildQuery();
auto* new_query = GetQuery(index);
new_query->guest_address = 0;
new_query->value = 0;
new_query->flags &= ~VideoCommon::QueryFlagBits::IsOrphan;
new_query->start_bank_id = current_query->start_bank_id;
new_query->size_banks = current_query->size_banks;
new_query->start_slot = current_query->start_slot;
new_query->size_slots = current_query->size_slots;
ApplyBankOp(new_query, [](SamplesQueryBank* bank, size_t start, size_t amount) {
bank->AddReference(amount);
});
pending_flush_queries.push_back(index);
std::function<void()> func([this, index] {
auto* query = GetQuery(index);
query->value += GetAmmendValue();
SetAccumulationValue(query->value);
Free(index);
});
}
void BuildResolveBuffer() { void BuildResolveBuffer() {
const VkBufferCreateInfo buffer_ci = { const VkBufferCreateInfo buffer_ci = {
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, .sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
@ -448,6 +489,7 @@ private:
static constexpr size_t resolve_slots = 8; static constexpr size_t resolve_slots = 8;
QueryCacheRuntime& runtime; QueryCacheRuntime& runtime;
VideoCore::RasterizerInterface* rasterizer;
const Device& device; const Device& device;
Scheduler& scheduler; Scheduler& scheduler;
const MemoryAllocator& memory_allocator; const MemoryAllocator& memory_allocator;
@ -470,6 +512,7 @@ private:
size_t current_query_id; size_t current_query_id;
VideoCommon::HostQueryBase* current_query; VideoCommon::HostQueryBase* current_query;
bool has_started{}; bool has_started{};
bool current_unset{};
std::mutex flush_guard; std::mutex flush_guard;
}; };
@ -677,7 +720,6 @@ public:
size_t offset_base = staging_ref.offset; size_t offset_base = staging_ref.offset;
for (auto q : pending_flush_queries) { for (auto q : pending_flush_queries) {
auto* query = GetQuery(q); auto* query = GetQuery(q);
query->flags |= VideoCommon::QueryFlagBits::IsQueuedForAsyncFlush;
auto& bank = bank_pool.GetBank(query->start_bank_id); auto& bank = bank_pool.GetBank(query->start_bank_id);
bank.Sync(staging_ref, offset_base, query->start_slot, 1); bank.Sync(staging_ref, offset_base, query->start_slot, 1);
offset_base += TFBQueryBank::QUERY_SIZE; offset_base += TFBQueryBank::QUERY_SIZE;
@ -1047,8 +1089,8 @@ struct QueryCacheRuntimeImpl {
buffer_cache{buffer_cache_}, device{device_}, buffer_cache{buffer_cache_}, device{device_},
memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_}, memory_allocator{memory_allocator_}, scheduler{scheduler_}, staging_pool{staging_pool_},
guest_streamer(0, runtime), guest_streamer(0, runtime),
sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, device, sample_streamer(static_cast<size_t>(QueryType::ZPassPixelCount64), runtime, rasterizer,
scheduler, memory_allocator), device, scheduler, memory_allocator),
tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device, tfb_streamer(static_cast<size_t>(QueryType::StreamingByteCount), runtime, device,
scheduler, memory_allocator, staging_pool), scheduler, memory_allocator, staging_pool),
primitives_succeeded_streamer( primitives_succeeded_streamer(
@ -1277,6 +1319,10 @@ bool QueryCacheRuntime::HostConditionalRenderingCompareValues(VideoCommon::Looku
return true; return true;
} }
} }
if (!is_in_bc[0] && !is_in_bc[1]) {
// Both queries are in query cache, it's best to just flush.
return false;
}
HostConditionalRenderingCompareBCImpl(object_1.address, equal_check); HostConditionalRenderingCompareBCImpl(object_1.address, equal_check);
return true; return true;
} }