mirror of https://git.suyu.dev/suyu/suyu
Merge pull request #3409 from ReinUsesLisp/host-queries
query_cache: Implement a query cache and query 21 (samples passed)
This commit is contained in:
commit
93acfbd3a5
|
@ -37,6 +37,7 @@ add_library(video_core STATIC
|
||||||
memory_manager.h
|
memory_manager.h
|
||||||
morton.cpp
|
morton.cpp
|
||||||
morton.h
|
morton.h
|
||||||
|
query_cache.h
|
||||||
rasterizer_accelerated.cpp
|
rasterizer_accelerated.cpp
|
||||||
rasterizer_accelerated.h
|
rasterizer_accelerated.h
|
||||||
rasterizer_cache.cpp
|
rasterizer_cache.cpp
|
||||||
|
@ -74,6 +75,8 @@ add_library(video_core STATIC
|
||||||
renderer_opengl/gl_stream_buffer.h
|
renderer_opengl/gl_stream_buffer.h
|
||||||
renderer_opengl/gl_texture_cache.cpp
|
renderer_opengl/gl_texture_cache.cpp
|
||||||
renderer_opengl/gl_texture_cache.h
|
renderer_opengl/gl_texture_cache.h
|
||||||
|
renderer_opengl/gl_query_cache.cpp
|
||||||
|
renderer_opengl/gl_query_cache.h
|
||||||
renderer_opengl/maxwell_to_gl.h
|
renderer_opengl/maxwell_to_gl.h
|
||||||
renderer_opengl/renderer_opengl.cpp
|
renderer_opengl/renderer_opengl.cpp
|
||||||
renderer_opengl/renderer_opengl.h
|
renderer_opengl/renderer_opengl.h
|
||||||
|
@ -177,6 +180,8 @@ if (ENABLE_VULKAN)
|
||||||
renderer_vulkan/vk_memory_manager.h
|
renderer_vulkan/vk_memory_manager.h
|
||||||
renderer_vulkan/vk_pipeline_cache.cpp
|
renderer_vulkan/vk_pipeline_cache.cpp
|
||||||
renderer_vulkan/vk_pipeline_cache.h
|
renderer_vulkan/vk_pipeline_cache.h
|
||||||
|
renderer_vulkan/vk_query_cache.cpp
|
||||||
|
renderer_vulkan/vk_query_cache.h
|
||||||
renderer_vulkan/vk_rasterizer.cpp
|
renderer_vulkan/vk_rasterizer.cpp
|
||||||
renderer_vulkan/vk_rasterizer.h
|
renderer_vulkan/vk_rasterizer.h
|
||||||
renderer_vulkan/vk_renderpass_cache.cpp
|
renderer_vulkan/vk_renderpass_cache.cpp
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#include <cinttypes>
|
#include <cinttypes>
|
||||||
#include <cstring>
|
#include <cstring>
|
||||||
|
#include <optional>
|
||||||
#include "common/assert.h"
|
#include "common/assert.h"
|
||||||
#include "core/core.h"
|
#include "core/core.h"
|
||||||
#include "core/core_timing.h"
|
#include "core/core_timing.h"
|
||||||
|
@ -16,6 +17,8 @@
|
||||||
|
|
||||||
namespace Tegra::Engines {
|
namespace Tegra::Engines {
|
||||||
|
|
||||||
|
using VideoCore::QueryType;
|
||||||
|
|
||||||
/// First register id that is actually a Macro call.
|
/// First register id that is actually a Macro call.
|
||||||
constexpr u32 MacroRegistersStart = 0xE00;
|
constexpr u32 MacroRegistersStart = 0xE00;
|
||||||
|
|
||||||
|
@ -400,6 +403,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||||
ProcessQueryCondition();
|
ProcessQueryCondition();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case MAXWELL3D_REG_INDEX(counter_reset): {
|
||||||
|
ProcessCounterReset();
|
||||||
|
break;
|
||||||
|
}
|
||||||
case MAXWELL3D_REG_INDEX(sync_info): {
|
case MAXWELL3D_REG_INDEX(sync_info): {
|
||||||
ProcessSyncPoint();
|
ProcessSyncPoint();
|
||||||
break;
|
break;
|
||||||
|
@ -544,40 +551,28 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
"Units other than CROP are unimplemented");
|
"Units other than CROP are unimplemented");
|
||||||
|
|
||||||
switch (regs.query.query_get.operation) {
|
switch (regs.query.query_get.operation) {
|
||||||
case Regs::QueryOperation::Release: {
|
case Regs::QueryOperation::Release:
|
||||||
const u64 result = regs.query.query_sequence;
|
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
|
||||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
|
||||||
break;
|
break;
|
||||||
}
|
case Regs::QueryOperation::Acquire:
|
||||||
case Regs::QueryOperation::Acquire: {
|
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
|
||||||
// Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
|
// matches the current payload.
|
||||||
// to write a value that matches the current payload.
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
||||||
break;
|
break;
|
||||||
|
case Regs::QueryOperation::Counter:
|
||||||
|
if (const std::optional<u64> result = GetQueryResult()) {
|
||||||
|
// If the query returns an empty optional it means it's cached and deferred.
|
||||||
|
// In this case we have a non-empty result, so we stamp it immediately.
|
||||||
|
StampQueryResult(*result, regs.query.query_get.short_query == 0);
|
||||||
}
|
}
|
||||||
case Regs::QueryOperation::Counter: {
|
|
||||||
u64 result{};
|
|
||||||
switch (regs.query.query_get.select) {
|
|
||||||
case Regs::QuerySelect::Zero:
|
|
||||||
result = 0;
|
|
||||||
break;
|
break;
|
||||||
default:
|
case Regs::QueryOperation::Trap:
|
||||||
result = 1;
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
|
||||||
static_cast<u32>(regs.query.query_get.select.Value()));
|
|
||||||
}
|
|
||||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
case Regs::QueryOperation::Trap: {
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
||||||
break;
|
break;
|
||||||
}
|
default:
|
||||||
default: {
|
|
||||||
UNIMPLEMENTED_MSG("Unknown query operation");
|
UNIMPLEMENTED_MSG("Unknown query operation");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessQueryCondition() {
|
void Maxwell3D::ProcessQueryCondition() {
|
||||||
|
@ -593,20 +588,20 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||||
}
|
}
|
||||||
case Regs::ConditionMode::ResNonZero: {
|
case Regs::ConditionMode::ResNonZero: {
|
||||||
Regs::QueryCompare cmp;
|
Regs::QueryCompare cmp;
|
||||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||||
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
|
execute_on = cmp.initial_sequence != 0U && cmp.initial_mode != 0U;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Regs::ConditionMode::Equal: {
|
case Regs::ConditionMode::Equal: {
|
||||||
Regs::QueryCompare cmp;
|
Regs::QueryCompare cmp;
|
||||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||||
execute_on =
|
execute_on =
|
||||||
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
|
cmp.initial_sequence == cmp.current_sequence && cmp.initial_mode == cmp.current_mode;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Regs::ConditionMode::NotEqual: {
|
case Regs::ConditionMode::NotEqual: {
|
||||||
Regs::QueryCompare cmp;
|
Regs::QueryCompare cmp;
|
||||||
memory_manager.ReadBlockUnsafe(condition_address, &cmp, sizeof(cmp));
|
memory_manager.ReadBlock(condition_address, &cmp, sizeof(cmp));
|
||||||
execute_on =
|
execute_on =
|
||||||
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
|
cmp.initial_sequence != cmp.current_sequence || cmp.initial_mode != cmp.current_mode;
|
||||||
break;
|
break;
|
||||||
|
@ -619,6 +614,18 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Maxwell3D::ProcessCounterReset() {
|
||||||
|
switch (regs.counter_reset) {
|
||||||
|
case Regs::CounterReset::SampleCnt:
|
||||||
|
rasterizer.ResetCounter(QueryType::SamplesPassed);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
LOG_WARNING(Render_OpenGL, "Unimplemented counter reset={}",
|
||||||
|
static_cast<int>(regs.counter_reset));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessSyncPoint() {
|
void Maxwell3D::ProcessSyncPoint() {
|
||||||
const u32 sync_point = regs.sync_info.sync_point.Value();
|
const u32 sync_point = regs.sync_info.sync_point.Value();
|
||||||
const u32 increment = regs.sync_info.increment.Value();
|
const u32 increment = regs.sync_info.increment.Value();
|
||||||
|
@ -661,6 +668,22 @@ void Maxwell3D::DrawArrays() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::optional<u64> Maxwell3D::GetQueryResult() {
|
||||||
|
switch (regs.query.query_get.select) {
|
||||||
|
case Regs::QuerySelect::Zero:
|
||||||
|
return 0;
|
||||||
|
case Regs::QuerySelect::SamplesPassed:
|
||||||
|
// Deferred.
|
||||||
|
rasterizer.Query(regs.query.QueryAddress(), VideoCore::QueryType::SamplesPassed,
|
||||||
|
system.GPU().GetTicks());
|
||||||
|
return {};
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
||||||
|
static_cast<u32>(regs.query.query_get.select.Value()));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
|
void Maxwell3D::ProcessCBBind(std::size_t stage_index) {
|
||||||
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
// Bind the buffer currently in CB_ADDRESS to the specified index in the desired shader stage.
|
||||||
auto& shader = state.shader_stages[stage_index];
|
auto& shader = state.shader_stages[stage_index];
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <array>
|
#include <array>
|
||||||
#include <bitset>
|
#include <bitset>
|
||||||
|
#include <optional>
|
||||||
#include <type_traits>
|
#include <type_traits>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
@ -409,6 +410,27 @@ public:
|
||||||
Linear = 1,
|
Linear = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class CounterReset : u32 {
|
||||||
|
SampleCnt = 0x01,
|
||||||
|
Unk02 = 0x02,
|
||||||
|
Unk03 = 0x03,
|
||||||
|
Unk04 = 0x04,
|
||||||
|
EmittedPrimitives = 0x10, // Not tested
|
||||||
|
Unk11 = 0x11,
|
||||||
|
Unk12 = 0x12,
|
||||||
|
Unk13 = 0x13,
|
||||||
|
Unk15 = 0x15,
|
||||||
|
Unk16 = 0x16,
|
||||||
|
Unk17 = 0x17,
|
||||||
|
Unk18 = 0x18,
|
||||||
|
Unk1A = 0x1A,
|
||||||
|
Unk1B = 0x1B,
|
||||||
|
Unk1C = 0x1C,
|
||||||
|
Unk1D = 0x1D,
|
||||||
|
Unk1E = 0x1E,
|
||||||
|
GeneratedPrimitives = 0x1F,
|
||||||
|
};
|
||||||
|
|
||||||
struct Cull {
|
struct Cull {
|
||||||
enum class FrontFace : u32 {
|
enum class FrontFace : u32 {
|
||||||
ClockWise = 0x0900,
|
ClockWise = 0x0900,
|
||||||
|
@ -857,7 +879,7 @@ public:
|
||||||
BitField<7, 1, u32> c7;
|
BitField<7, 1, u32> c7;
|
||||||
} clip_distance_enabled;
|
} clip_distance_enabled;
|
||||||
|
|
||||||
INSERT_UNION_PADDING_WORDS(0x1);
|
u32 samplecnt_enable;
|
||||||
|
|
||||||
float point_size;
|
float point_size;
|
||||||
|
|
||||||
|
@ -865,7 +887,11 @@ public:
|
||||||
|
|
||||||
u32 point_sprite_enable;
|
u32 point_sprite_enable;
|
||||||
|
|
||||||
INSERT_UNION_PADDING_WORDS(0x5);
|
INSERT_UNION_PADDING_WORDS(0x3);
|
||||||
|
|
||||||
|
CounterReset counter_reset;
|
||||||
|
|
||||||
|
INSERT_UNION_PADDING_WORDS(0x1);
|
||||||
|
|
||||||
u32 zeta_enable;
|
u32 zeta_enable;
|
||||||
|
|
||||||
|
@ -1412,12 +1438,15 @@ private:
|
||||||
/// Handles a write to the QUERY_GET register.
|
/// Handles a write to the QUERY_GET register.
|
||||||
void ProcessQueryGet();
|
void ProcessQueryGet();
|
||||||
|
|
||||||
// Writes the query result accordingly
|
/// Writes the query result accordingly.
|
||||||
void StampQueryResult(u64 payload, bool long_query);
|
void StampQueryResult(u64 payload, bool long_query);
|
||||||
|
|
||||||
// Handles Conditional Rendering
|
/// Handles conditional rendering.
|
||||||
void ProcessQueryCondition();
|
void ProcessQueryCondition();
|
||||||
|
|
||||||
|
/// Handles counter resets.
|
||||||
|
void ProcessCounterReset();
|
||||||
|
|
||||||
/// Handles writes to syncing register.
|
/// Handles writes to syncing register.
|
||||||
void ProcessSyncPoint();
|
void ProcessSyncPoint();
|
||||||
|
|
||||||
|
@ -1434,6 +1463,9 @@ private:
|
||||||
|
|
||||||
// Handles a instance drawcall from MME
|
// Handles a instance drawcall from MME
|
||||||
void StepInstance(MMEDrawMode expected_mode, u32 count);
|
void StepInstance(MMEDrawMode expected_mode, u32 count);
|
||||||
|
|
||||||
|
/// Returns a query's value or an empty object if the value will be deferred through a cache.
|
||||||
|
std::optional<u64> GetQueryResult();
|
||||||
};
|
};
|
||||||
|
|
||||||
#define ASSERT_REG_POSITION(field_name, position) \
|
#define ASSERT_REG_POSITION(field_name, position) \
|
||||||
|
@ -1499,8 +1531,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
|
||||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
||||||
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
||||||
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
||||||
|
ASSERT_REG_POSITION(samplecnt_enable, 0x545);
|
||||||
ASSERT_REG_POSITION(point_size, 0x546);
|
ASSERT_REG_POSITION(point_size, 0x546);
|
||||||
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
|
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
|
||||||
|
ASSERT_REG_POSITION(counter_reset, 0x54C);
|
||||||
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
||||||
ASSERT_REG_POSITION(multisample_control, 0x54F);
|
ASSERT_REG_POSITION(multisample_control, 0x54F);
|
||||||
ASSERT_REG_POSITION(condition, 0x554);
|
ASSERT_REG_POSITION(condition, 0x554);
|
||||||
|
|
|
@ -0,0 +1,359 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <array>
|
||||||
|
#include <cstring>
|
||||||
|
#include <iterator>
|
||||||
|
#include <memory>
|
||||||
|
#include <mutex>
|
||||||
|
#include <optional>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "core/core.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/gpu.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
|
||||||
|
namespace VideoCommon {
|
||||||
|
|
||||||
|
template <class QueryCache, class HostCounter>
|
||||||
|
class CounterStreamBase {
|
||||||
|
public:
|
||||||
|
explicit CounterStreamBase(QueryCache& cache, VideoCore::QueryType type)
|
||||||
|
: cache{cache}, type{type} {}
|
||||||
|
|
||||||
|
/// Updates the state of the stream, enabling or disabling as needed.
|
||||||
|
void Update(bool enabled) {
|
||||||
|
if (enabled) {
|
||||||
|
Enable();
|
||||||
|
} else {
|
||||||
|
Disable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resets the stream to zero. It doesn't disable the query after resetting.
|
||||||
|
void Reset() {
|
||||||
|
if (current) {
|
||||||
|
current->EndQuery();
|
||||||
|
|
||||||
|
// Immediately start a new query to avoid disabling its state.
|
||||||
|
current = cache.Counter(nullptr, type);
|
||||||
|
}
|
||||||
|
last = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the current counter slicing as needed.
|
||||||
|
std::shared_ptr<HostCounter> Current() {
|
||||||
|
if (!current) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
current->EndQuery();
|
||||||
|
last = std::move(current);
|
||||||
|
current = cache.Counter(last, type);
|
||||||
|
return last;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true when the counter stream is enabled.
|
||||||
|
bool IsEnabled() const {
|
||||||
|
return current != nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Enables the stream.
|
||||||
|
void Enable() {
|
||||||
|
if (current) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
current = cache.Counter(last, type);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Disables the stream.
|
||||||
|
void Disable() {
|
||||||
|
if (current) {
|
||||||
|
current->EndQuery();
|
||||||
|
}
|
||||||
|
last = std::exchange(current, nullptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
QueryCache& cache;
|
||||||
|
const VideoCore::QueryType type;
|
||||||
|
|
||||||
|
std::shared_ptr<HostCounter> current;
|
||||||
|
std::shared_ptr<HostCounter> last;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class QueryCache, class CachedQuery, class CounterStream, class HostCounter,
|
||||||
|
class QueryPool>
|
||||||
|
class QueryCacheBase {
|
||||||
|
public:
|
||||||
|
explicit QueryCacheBase(Core::System& system, VideoCore::RasterizerInterface& rasterizer)
|
||||||
|
: system{system}, rasterizer{rasterizer}, streams{{CounterStream{
|
||||||
|
static_cast<QueryCache&>(*this),
|
||||||
|
VideoCore::QueryType::SamplesPassed}}} {}
|
||||||
|
|
||||||
|
void InvalidateRegion(CacheAddr addr, std::size_t size) {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
FlushAndRemoveRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
void FlushRegion(CacheAddr addr, std::size_t size) {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
FlushAndRemoveRegion(addr, size);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Records a query in GPU mapped memory, potentially marked with a timestamp.
|
||||||
|
* @param gpu_addr GPU address to flush to when the mapped memory is read.
|
||||||
|
* @param type Query type, e.g. SamplesPassed.
|
||||||
|
* @param timestamp Timestamp, when empty the flushed query is assumed to be short.
|
||||||
|
*/
|
||||||
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
auto& memory_manager = system.GPU().MemoryManager();
|
||||||
|
const auto host_ptr = memory_manager.GetPointer(gpu_addr);
|
||||||
|
|
||||||
|
CachedQuery* query = TryGet(ToCacheAddr(host_ptr));
|
||||||
|
if (!query) {
|
||||||
|
const auto cpu_addr = memory_manager.GpuToCpuAddress(gpu_addr);
|
||||||
|
ASSERT_OR_EXECUTE(cpu_addr, return;);
|
||||||
|
|
||||||
|
query = Register(type, *cpu_addr, host_ptr, timestamp.has_value());
|
||||||
|
}
|
||||||
|
|
||||||
|
query->BindCounter(Stream(type).Current(), timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Updates counters from GPU state. Expected to be called once per draw, clear or dispatch.
|
||||||
|
void UpdateCounters() {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
const auto& regs = system.GPU().Maxwell3D().regs;
|
||||||
|
Stream(VideoCore::QueryType::SamplesPassed).Update(regs.samplecnt_enable);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Resets a counter to zero. It doesn't disable the query after resetting.
|
||||||
|
void ResetCounter(VideoCore::QueryType type) {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
Stream(type).Reset();
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Disable all active streams. Expected to be called at the end of a command buffer.
|
||||||
|
void DisableStreams() {
|
||||||
|
std::unique_lock lock{mutex};
|
||||||
|
for (auto& stream : streams) {
|
||||||
|
stream.Update(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns a new host counter.
|
||||||
|
std::shared_ptr<HostCounter> Counter(std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type) {
|
||||||
|
return std::make_shared<HostCounter>(static_cast<QueryCache&>(*this), std::move(dependency),
|
||||||
|
type);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the counter stream of the specified type.
|
||||||
|
CounterStream& Stream(VideoCore::QueryType type) {
|
||||||
|
return streams[static_cast<std::size_t>(type)];
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns the counter stream of the specified type.
|
||||||
|
const CounterStream& Stream(VideoCore::QueryType type) const {
|
||||||
|
return streams[static_cast<std::size_t>(type)];
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
std::array<QueryPool, VideoCore::NumQueryTypes> query_pools;
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Flushes a memory range to guest memory and removes it from the cache.
|
||||||
|
void FlushAndRemoveRegion(CacheAddr addr, std::size_t size) {
|
||||||
|
const u64 addr_begin = static_cast<u64>(addr);
|
||||||
|
const u64 addr_end = addr_begin + static_cast<u64>(size);
|
||||||
|
const auto in_range = [addr_begin, addr_end](CachedQuery& query) {
|
||||||
|
const u64 cache_begin = query.GetCacheAddr();
|
||||||
|
const u64 cache_end = cache_begin + query.SizeInBytes();
|
||||||
|
return cache_begin < addr_end && addr_begin < cache_end;
|
||||||
|
};
|
||||||
|
|
||||||
|
const u64 page_end = addr_end >> PAGE_SHIFT;
|
||||||
|
for (u64 page = addr_begin >> PAGE_SHIFT; page <= page_end; ++page) {
|
||||||
|
const auto& it = cached_queries.find(page);
|
||||||
|
if (it == std::end(cached_queries)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto& contents = it->second;
|
||||||
|
for (auto& query : contents) {
|
||||||
|
if (!in_range(query)) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
rasterizer.UpdatePagesCachedCount(query.CpuAddr(), query.SizeInBytes(), -1);
|
||||||
|
query.Flush();
|
||||||
|
}
|
||||||
|
contents.erase(std::remove_if(std::begin(contents), std::end(contents), in_range),
|
||||||
|
std::end(contents));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Registers the passed parameters as cached and returns a pointer to the stored cached query.
|
||||||
|
CachedQuery* Register(VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr, bool timestamp) {
|
||||||
|
rasterizer.UpdatePagesCachedCount(cpu_addr, CachedQuery::SizeInBytes(timestamp), 1);
|
||||||
|
const u64 page = static_cast<u64>(ToCacheAddr(host_ptr)) >> PAGE_SHIFT;
|
||||||
|
return &cached_queries[page].emplace_back(static_cast<QueryCache&>(*this), type, cpu_addr,
|
||||||
|
host_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Tries to a get a cached query. Returns nullptr on failure.
|
||||||
|
CachedQuery* TryGet(CacheAddr addr) {
|
||||||
|
const u64 page = static_cast<u64>(addr) >> PAGE_SHIFT;
|
||||||
|
const auto it = cached_queries.find(page);
|
||||||
|
if (it == std::end(cached_queries)) {
|
||||||
|
return nullptr;
|
||||||
|
}
|
||||||
|
auto& contents = it->second;
|
||||||
|
const auto found =
|
||||||
|
std::find_if(std::begin(contents), std::end(contents),
|
||||||
|
[addr](auto& query) { return query.GetCacheAddr() == addr; });
|
||||||
|
return found != std::end(contents) ? &*found : nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr std::uintptr_t PAGE_SIZE = 4096;
|
||||||
|
static constexpr unsigned PAGE_SHIFT = 12;
|
||||||
|
|
||||||
|
Core::System& system;
|
||||||
|
VideoCore::RasterizerInterface& rasterizer;
|
||||||
|
|
||||||
|
std::recursive_mutex mutex;
|
||||||
|
|
||||||
|
std::unordered_map<u64, std::vector<CachedQuery>> cached_queries;
|
||||||
|
|
||||||
|
std::array<CounterStream, VideoCore::NumQueryTypes> streams;
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class QueryCache, class HostCounter>
|
||||||
|
class HostCounterBase {
|
||||||
|
public:
|
||||||
|
explicit HostCounterBase(std::shared_ptr<HostCounter> dependency_)
|
||||||
|
: dependency{std::move(dependency_)}, depth{dependency ? (dependency->Depth() + 1) : 0} {
|
||||||
|
// Avoid nesting too many dependencies to avoid a stack overflow when these are deleted.
|
||||||
|
constexpr u64 depth_threshold = 96;
|
||||||
|
if (depth > depth_threshold) {
|
||||||
|
depth = 0;
|
||||||
|
base_result = dependency->Query();
|
||||||
|
dependency = nullptr;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
virtual ~HostCounterBase() = default;
|
||||||
|
|
||||||
|
/// Returns the current value of the query.
|
||||||
|
u64 Query() {
|
||||||
|
if (result) {
|
||||||
|
return *result;
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 value = BlockingQuery() + base_result;
|
||||||
|
if (dependency) {
|
||||||
|
value += dependency->Query();
|
||||||
|
dependency = nullptr;
|
||||||
|
}
|
||||||
|
|
||||||
|
result = value;
|
||||||
|
return *result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Returns true when flushing this query will potentially wait.
|
||||||
|
bool WaitPending() const noexcept {
|
||||||
|
return result.has_value();
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 Depth() const noexcept {
|
||||||
|
return depth;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/// Returns the value of query from the backend API blocking as needed.
|
||||||
|
virtual u64 BlockingQuery() const = 0;
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::shared_ptr<HostCounter> dependency; ///< Counter to add to this value.
|
||||||
|
std::optional<u64> result; ///< Filled with the already returned value.
|
||||||
|
u64 depth; ///< Number of nested dependencies.
|
||||||
|
u64 base_result = 0; ///< Equivalent to nested dependencies value.
|
||||||
|
};
|
||||||
|
|
||||||
|
template <class HostCounter>
|
||||||
|
class CachedQueryBase {
|
||||||
|
public:
|
||||||
|
explicit CachedQueryBase(VAddr cpu_addr, u8* host_ptr)
|
||||||
|
: cpu_addr{cpu_addr}, host_ptr{host_ptr} {}
|
||||||
|
virtual ~CachedQueryBase() = default;
|
||||||
|
|
||||||
|
CachedQueryBase(CachedQueryBase&&) noexcept = default;
|
||||||
|
CachedQueryBase(const CachedQueryBase&) = delete;
|
||||||
|
|
||||||
|
CachedQueryBase& operator=(CachedQueryBase&&) noexcept = default;
|
||||||
|
CachedQueryBase& operator=(const CachedQueryBase&) = delete;
|
||||||
|
|
||||||
|
/// Flushes the query to guest memory.
|
||||||
|
virtual void Flush() {
|
||||||
|
// When counter is nullptr it means that it's just been reseted. We are supposed to write a
|
||||||
|
// zero in these cases.
|
||||||
|
const u64 value = counter ? counter->Query() : 0;
|
||||||
|
std::memcpy(host_ptr, &value, sizeof(u64));
|
||||||
|
|
||||||
|
if (timestamp) {
|
||||||
|
std::memcpy(host_ptr + TIMESTAMP_OFFSET, &*timestamp, sizeof(u64));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Binds a counter to this query.
|
||||||
|
void BindCounter(std::shared_ptr<HostCounter> counter_, std::optional<u64> timestamp_) {
|
||||||
|
if (counter) {
|
||||||
|
// If there's an old counter set it means the query is being rewritten by the game.
|
||||||
|
// To avoid losing the data forever, flush here.
|
||||||
|
Flush();
|
||||||
|
}
|
||||||
|
counter = std::move(counter_);
|
||||||
|
timestamp = timestamp_;
|
||||||
|
}
|
||||||
|
|
||||||
|
VAddr CpuAddr() const noexcept {
|
||||||
|
return cpu_addr;
|
||||||
|
}
|
||||||
|
|
||||||
|
CacheAddr GetCacheAddr() const noexcept {
|
||||||
|
return ToCacheAddr(host_ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 SizeInBytes() const noexcept {
|
||||||
|
return SizeInBytes(timestamp.has_value());
|
||||||
|
}
|
||||||
|
|
||||||
|
static constexpr u64 SizeInBytes(bool with_timestamp) noexcept {
|
||||||
|
return with_timestamp ? LARGE_QUERY_SIZE : SMALL_QUERY_SIZE;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected:
|
||||||
|
/// Returns true when querying the counter may potentially block.
|
||||||
|
bool WaitPending() const noexcept {
|
||||||
|
return counter && counter->WaitPending();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr std::size_t SMALL_QUERY_SIZE = 8; // Query size without timestamp.
|
||||||
|
static constexpr std::size_t LARGE_QUERY_SIZE = 16; // Query size with timestamp.
|
||||||
|
static constexpr std::intptr_t TIMESTAMP_OFFSET = 8; // Timestamp offset in a large query.
|
||||||
|
|
||||||
|
VAddr cpu_addr; ///< Guest CPU address.
|
||||||
|
u8* host_ptr; ///< Writable host pointer.
|
||||||
|
std::shared_ptr<HostCounter> counter; ///< Host counter to query, owns the dependency tree.
|
||||||
|
std::optional<u64> timestamp; ///< Timestamp to flush to guest memory.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace VideoCommon
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <atomic>
|
#include <atomic>
|
||||||
#include <functional>
|
#include <functional>
|
||||||
|
#include <optional>
|
||||||
#include "common/common_types.h"
|
#include "common/common_types.h"
|
||||||
#include "video_core/engines/fermi_2d.h"
|
#include "video_core/engines/fermi_2d.h"
|
||||||
#include "video_core/gpu.h"
|
#include "video_core/gpu.h"
|
||||||
|
@ -17,6 +18,11 @@ class MemoryManager;
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
enum class QueryType {
|
||||||
|
SamplesPassed,
|
||||||
|
};
|
||||||
|
constexpr std::size_t NumQueryTypes = 1;
|
||||||
|
|
||||||
enum class LoadCallbackStage {
|
enum class LoadCallbackStage {
|
||||||
Prepare,
|
Prepare,
|
||||||
Decompile,
|
Decompile,
|
||||||
|
@ -41,6 +47,12 @@ public:
|
||||||
/// Dispatches a compute shader invocation
|
/// Dispatches a compute shader invocation
|
||||||
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
|
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
|
||||||
|
|
||||||
|
/// Resets the counter of a query
|
||||||
|
virtual void ResetCounter(QueryType type) = 0;
|
||||||
|
|
||||||
|
/// Records a GPU query and caches it
|
||||||
|
virtual void Query(GPUVAddr gpu_addr, QueryType type, std::optional<u64> timestamp) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,120 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstring>
|
||||||
|
#include <memory>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <glad/glad.h>
|
||||||
|
|
||||||
|
#include "common/assert.h"
|
||||||
|
#include "core/core.h"
|
||||||
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr std::array<GLenum, VideoCore::NumQueryTypes> QueryTargets = {GL_SAMPLES_PASSED};
|
||||||
|
|
||||||
|
constexpr GLenum GetTarget(VideoCore::QueryType type) {
|
||||||
|
return QueryTargets[static_cast<std::size_t>(type)];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
QueryCache::QueryCache(Core::System& system, RasterizerOpenGL& gl_rasterizer)
|
||||||
|
: VideoCommon::QueryCacheBase<
|
||||||
|
QueryCache, CachedQuery, CounterStream, HostCounter,
|
||||||
|
std::vector<OGLQuery>>{system,
|
||||||
|
static_cast<VideoCore::RasterizerInterface&>(gl_rasterizer)},
|
||||||
|
gl_rasterizer{gl_rasterizer} {}
|
||||||
|
|
||||||
|
QueryCache::~QueryCache() = default;
|
||||||
|
|
||||||
|
OGLQuery QueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||||
|
auto& reserve = query_pools[static_cast<std::size_t>(type)];
|
||||||
|
OGLQuery query;
|
||||||
|
if (reserve.empty()) {
|
||||||
|
query.Create(GetTarget(type));
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
query = std::move(reserve.back());
|
||||||
|
reserve.pop_back();
|
||||||
|
return query;
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryCache::Reserve(VideoCore::QueryType type, OGLQuery&& query) {
|
||||||
|
query_pools[static_cast<std::size_t>(type)].push_back(std::move(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
bool QueryCache::AnyCommandQueued() const noexcept {
|
||||||
|
return gl_rasterizer.AnyCommandQueued();
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type)
|
||||||
|
: VideoCommon::HostCounterBase<QueryCache, HostCounter>{std::move(dependency)}, cache{cache},
|
||||||
|
type{type}, query{cache.AllocateQuery(type)} {
|
||||||
|
glBeginQuery(GetTarget(type), query.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::~HostCounter() {
|
||||||
|
cache.Reserve(type, std::move(query));
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostCounter::EndQuery() {
|
||||||
|
if (!cache.AnyCommandQueued()) {
|
||||||
|
// There are chances a query waited on without commands (glDraw, glClear, glDispatch). Not
|
||||||
|
// having any of these causes a lock. glFlush is considered a command, so we can safely wait
|
||||||
|
// for this. Insert to the OpenGL command stream a flush.
|
||||||
|
glFlush();
|
||||||
|
}
|
||||||
|
glEndQuery(GetTarget(type));
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 HostCounter::BlockingQuery() const {
|
||||||
|
GLint64 value;
|
||||||
|
glGetQueryObjecti64v(query.handle, GL_QUERY_RESULT, &value);
|
||||||
|
return static_cast<u64>(value);
|
||||||
|
}
|
||||||
|
|
||||||
|
CachedQuery::CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr, u8* host_ptr)
|
||||||
|
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr}, cache{&cache}, type{type} {}
|
||||||
|
|
||||||
|
CachedQuery::CachedQuery(CachedQuery&& rhs) noexcept
|
||||||
|
: VideoCommon::CachedQueryBase<HostCounter>(std::move(rhs)), cache{rhs.cache}, type{rhs.type} {}
|
||||||
|
|
||||||
|
CachedQuery& CachedQuery::operator=(CachedQuery&& rhs) noexcept {
|
||||||
|
VideoCommon::CachedQueryBase<HostCounter>::operator=(std::move(rhs));
|
||||||
|
cache = rhs.cache;
|
||||||
|
type = rhs.type;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
void CachedQuery::Flush() {
|
||||||
|
// Waiting for a query while another query of the same target is enabled locks Nvidia's driver.
|
||||||
|
// To avoid this disable and re-enable keeping the dependency stream.
|
||||||
|
// But we only have to do this if we have pending waits to be done.
|
||||||
|
auto& stream = cache->Stream(type);
|
||||||
|
const bool slice_counter = WaitPending() && stream.IsEnabled();
|
||||||
|
if (slice_counter) {
|
||||||
|
stream.Update(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
VideoCommon::CachedQueryBase<HostCounter>::Flush();
|
||||||
|
|
||||||
|
if (slice_counter) {
|
||||||
|
stream.Update(true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -0,0 +1,78 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <array>
|
||||||
|
#include <memory>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/query_cache.h"
|
||||||
|
#include "video_core/rasterizer_interface.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
|
||||||
|
namespace Core {
|
||||||
|
class System;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class CachedQuery;
|
||||||
|
class HostCounter;
|
||||||
|
class QueryCache;
|
||||||
|
class RasterizerOpenGL;
|
||||||
|
|
||||||
|
using CounterStream = VideoCommon::CounterStreamBase<QueryCache, HostCounter>;
|
||||||
|
|
||||||
|
class QueryCache final : public VideoCommon::QueryCacheBase<QueryCache, CachedQuery, CounterStream,
|
||||||
|
HostCounter, std::vector<OGLQuery>> {
|
||||||
|
public:
|
||||||
|
explicit QueryCache(Core::System& system, RasterizerOpenGL& rasterizer);
|
||||||
|
~QueryCache();
|
||||||
|
|
||||||
|
OGLQuery AllocateQuery(VideoCore::QueryType type);
|
||||||
|
|
||||||
|
void Reserve(VideoCore::QueryType type, OGLQuery&& query);
|
||||||
|
|
||||||
|
bool AnyCommandQueued() const noexcept;
|
||||||
|
|
||||||
|
private:
|
||||||
|
RasterizerOpenGL& gl_rasterizer;
|
||||||
|
};
|
||||||
|
|
||||||
|
class HostCounter final : public VideoCommon::HostCounterBase<QueryCache, HostCounter> {
|
||||||
|
public:
|
||||||
|
explicit HostCounter(QueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type);
|
||||||
|
~HostCounter();
|
||||||
|
|
||||||
|
void EndQuery();
|
||||||
|
|
||||||
|
private:
|
||||||
|
u64 BlockingQuery() const override;
|
||||||
|
|
||||||
|
QueryCache& cache;
|
||||||
|
const VideoCore::QueryType type;
|
||||||
|
OGLQuery query;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CachedQuery final : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||||
|
public:
|
||||||
|
explicit CachedQuery(QueryCache& cache, VideoCore::QueryType type, VAddr cpu_addr,
|
||||||
|
u8* host_ptr);
|
||||||
|
CachedQuery(CachedQuery&& rhs) noexcept;
|
||||||
|
CachedQuery(const CachedQuery&) = delete;
|
||||||
|
|
||||||
|
CachedQuery& operator=(CachedQuery&& rhs) noexcept;
|
||||||
|
CachedQuery& operator=(const CachedQuery&) = delete;
|
||||||
|
|
||||||
|
void Flush() override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
QueryCache* cache;
|
||||||
|
VideoCore::QueryType type;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -25,6 +25,7 @@
|
||||||
#include "video_core/engines/maxwell_3d.h"
|
#include "video_core/engines/maxwell_3d.h"
|
||||||
#include "video_core/engines/shader_type.h"
|
#include "video_core/engines/shader_type.h"
|
||||||
#include "video_core/memory_manager.h"
|
#include "video_core/memory_manager.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
#include "video_core/renderer_opengl/gl_rasterizer.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
#include "video_core/renderer_opengl/gl_shader_gen.h"
|
||||||
|
@ -92,8 +93,8 @@ std::size_t GetConstBufferSize(const Tegra::Engines::ConstBufferInfo& buffer,
|
||||||
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
RasterizerOpenGL::RasterizerOpenGL(Core::System& system, Core::Frontend::EmuWindow& emu_window,
|
||||||
ScreenInfo& info)
|
ScreenInfo& info)
|
||||||
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
|
: RasterizerAccelerated{system.Memory()}, texture_cache{system, *this, device},
|
||||||
shader_cache{*this, system, emu_window, device}, system{system}, screen_info{info},
|
shader_cache{*this, system, emu_window, device}, query_cache{system, *this}, system{system},
|
||||||
buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
|
screen_info{info}, buffer_cache{*this, system, device, STREAM_BUFFER_SIZE} {
|
||||||
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
shader_program_manager = std::make_unique<GLShader::ProgramManager>();
|
||||||
state.draw.shader_program = 0;
|
state.draw.shader_program = 0;
|
||||||
state.Apply();
|
state.Apply();
|
||||||
|
@ -541,11 +542,16 @@ void RasterizerOpenGL::Clear() {
|
||||||
} else if (use_stencil) {
|
} else if (use_stencil) {
|
||||||
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
glClearBufferiv(GL_STENCIL, 0, ®s.clear_stencil);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
++num_queued_commands;
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
const auto& regs = gpu.regs;
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
SyncRasterizeEnable(state);
|
SyncRasterizeEnable(state);
|
||||||
SyncColorMask();
|
SyncColorMask();
|
||||||
|
@ -638,6 +644,8 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
glTextureBarrier();
|
glTextureBarrier();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
++num_queued_commands;
|
||||||
|
|
||||||
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
|
const GLuint base_instance = static_cast<GLuint>(gpu.regs.vb_base_instance);
|
||||||
const GLsizei num_instances =
|
const GLsizei num_instances =
|
||||||
static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
|
static_cast<GLsizei>(is_instanced ? gpu.mme_draw.instance_count : 1);
|
||||||
|
@ -707,6 +715,16 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
state.ApplyProgramPipeline();
|
state.ApplyProgramPipeline();
|
||||||
|
|
||||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||||
|
++num_queued_commands;
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
query_cache.ResetCounter(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||||
|
std::optional<u64> timestamp) {
|
||||||
|
query_cache.Query(gpu_addr, type, timestamp);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAll() {}
|
void RasterizerOpenGL::FlushAll() {}
|
||||||
|
@ -718,6 +736,7 @@ void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
}
|
}
|
||||||
texture_cache.FlushRegion(addr, size);
|
texture_cache.FlushRegion(addr, size);
|
||||||
buffer_cache.FlushRegion(addr, size);
|
buffer_cache.FlushRegion(addr, size);
|
||||||
|
query_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
@ -728,6 +747,7 @@ void RasterizerOpenGL::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
texture_cache.InvalidateRegion(addr, size);
|
texture_cache.InvalidateRegion(addr, size);
|
||||||
shader_cache.InvalidateRegion(addr, size);
|
shader_cache.InvalidateRegion(addr, size);
|
||||||
buffer_cache.InvalidateRegion(addr, size);
|
buffer_cache.InvalidateRegion(addr, size);
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
@ -738,10 +758,18 @@ void RasterizerOpenGL::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushCommands() {
|
void RasterizerOpenGL::FlushCommands() {
|
||||||
|
// Only flush when we have commands queued to OpenGL.
|
||||||
|
if (num_queued_commands == 0) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
num_queued_commands = 0;
|
||||||
glFlush();
|
glFlush();
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::TickFrame() {
|
void RasterizerOpenGL::TickFrame() {
|
||||||
|
// Ticking a frame means that buffers will be swapped, calling glFlush implicitly.
|
||||||
|
num_queued_commands = 0;
|
||||||
|
|
||||||
buffer_cache.TickFrame();
|
buffer_cache.TickFrame();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||||
|
@ -61,6 +62,8 @@ public:
|
||||||
bool DrawMultiBatch(bool is_indexed) override;
|
bool DrawMultiBatch(bool is_indexed) override;
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void DispatchCompute(GPUVAddr code_addr) override;
|
void DispatchCompute(GPUVAddr code_addr) override;
|
||||||
|
void ResetCounter(VideoCore::QueryType type) override;
|
||||||
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
|
@ -75,6 +78,11 @@ public:
|
||||||
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
void LoadDiskResources(const std::atomic_bool& stop_loading,
|
||||||
const VideoCore::DiskResourceLoadCallback& callback) override;
|
const VideoCore::DiskResourceLoadCallback& callback) override;
|
||||||
|
|
||||||
|
/// Returns true when there are commands queued to the OpenGL server.
|
||||||
|
bool AnyCommandQueued() const {
|
||||||
|
return num_queued_commands > 0;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
/// Configures the color and depth framebuffer states.
|
/// Configures the color and depth framebuffer states.
|
||||||
void ConfigureFramebuffers();
|
void ConfigureFramebuffers();
|
||||||
|
@ -180,10 +188,23 @@ private:
|
||||||
/// Syncs the alpha test state to match the guest state
|
/// Syncs the alpha test state to match the guest state
|
||||||
void SyncAlphaTest();
|
void SyncAlphaTest();
|
||||||
|
|
||||||
/// Check for extension that are not strictly required
|
/// Check for extension that are not strictly required but are needed for correct emulation
|
||||||
/// but are needed for correct emulation
|
|
||||||
void CheckExtensions();
|
void CheckExtensions();
|
||||||
|
|
||||||
|
std::size_t CalculateVertexArraysSize() const;
|
||||||
|
|
||||||
|
std::size_t CalculateIndexBufferSize() const;
|
||||||
|
|
||||||
|
/// Updates and returns a vertex array object representing current vertex format
|
||||||
|
GLuint SetupVertexFormat();
|
||||||
|
|
||||||
|
void SetupVertexBuffer(GLuint vao);
|
||||||
|
void SetupVertexInstances(GLuint vao);
|
||||||
|
|
||||||
|
GLintptr SetupIndexBuffer();
|
||||||
|
|
||||||
|
void SetupShaders(GLenum primitive_mode);
|
||||||
|
|
||||||
const Device device;
|
const Device device;
|
||||||
OpenGLState state;
|
OpenGLState state;
|
||||||
|
|
||||||
|
@ -191,6 +212,7 @@ private:
|
||||||
ShaderCacheOpenGL shader_cache;
|
ShaderCacheOpenGL shader_cache;
|
||||||
SamplerCacheOpenGL sampler_cache;
|
SamplerCacheOpenGL sampler_cache;
|
||||||
FramebufferCacheOpenGL framebuffer_cache;
|
FramebufferCacheOpenGL framebuffer_cache;
|
||||||
|
QueryCache query_cache;
|
||||||
|
|
||||||
Core::System& system;
|
Core::System& system;
|
||||||
ScreenInfo& screen_info;
|
ScreenInfo& screen_info;
|
||||||
|
@ -208,19 +230,8 @@ private:
|
||||||
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
|
||||||
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
|
||||||
|
|
||||||
std::size_t CalculateVertexArraysSize() const;
|
/// Number of commands queued to the OpenGL driver. Reseted on flush.
|
||||||
|
std::size_t num_queued_commands = 0;
|
||||||
std::size_t CalculateIndexBufferSize() const;
|
|
||||||
|
|
||||||
/// Updates and returns a vertex array object representing current vertex format
|
|
||||||
GLuint SetupVertexFormat();
|
|
||||||
|
|
||||||
void SetupVertexBuffer(GLuint vao);
|
|
||||||
void SetupVertexInstances(GLuint vao);
|
|
||||||
|
|
||||||
GLintptr SetupIndexBuffer();
|
|
||||||
|
|
||||||
void SetupShaders(GLenum primitive_mode);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -207,4 +207,21 @@ void OGLFramebuffer::Release() {
|
||||||
handle = 0;
|
handle = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void OGLQuery::Create(GLenum target) {
|
||||||
|
if (handle != 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_ResourceCreation);
|
||||||
|
glCreateQueries(target, 1, &handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void OGLQuery::Release() {
|
||||||
|
if (handle == 0)
|
||||||
|
return;
|
||||||
|
|
||||||
|
MICROPROFILE_SCOPE(OpenGL_ResourceDeletion);
|
||||||
|
glDeleteQueries(1, &handle);
|
||||||
|
handle = 0;
|
||||||
|
}
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -266,4 +266,29 @@ public:
|
||||||
GLuint handle = 0;
|
GLuint handle = 0;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
class OGLQuery : private NonCopyable {
|
||||||
|
public:
|
||||||
|
OGLQuery() = default;
|
||||||
|
|
||||||
|
OGLQuery(OGLQuery&& o) noexcept : handle(std::exchange(o.handle, 0)) {}
|
||||||
|
|
||||||
|
~OGLQuery() {
|
||||||
|
Release();
|
||||||
|
}
|
||||||
|
|
||||||
|
OGLQuery& operator=(OGLQuery&& o) noexcept {
|
||||||
|
Release();
|
||||||
|
handle = std::exchange(o.handle, 0);
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Creates a new internal OpenGL resource and stores the handle
|
||||||
|
void Create(GLenum target);
|
||||||
|
|
||||||
|
/// Deletes the internal OpenGL resource
|
||||||
|
void Release();
|
||||||
|
|
||||||
|
GLuint handle = 0;
|
||||||
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
|
@ -104,6 +104,7 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||||
features.depthBiasClamp = true;
|
features.depthBiasClamp = true;
|
||||||
features.geometryShader = true;
|
features.geometryShader = true;
|
||||||
features.tessellationShader = true;
|
features.tessellationShader = true;
|
||||||
|
features.occlusionQueryPrecise = true;
|
||||||
features.fragmentStoresAndAtomics = true;
|
features.fragmentStoresAndAtomics = true;
|
||||||
features.shaderImageGatherExtended = true;
|
features.shaderImageGatherExtended = true;
|
||||||
features.shaderStorageImageWriteWithoutFormat = true;
|
features.shaderStorageImageWriteWithoutFormat = true;
|
||||||
|
@ -117,6 +118,10 @@ bool VKDevice::Create(const vk::DispatchLoaderDynamic& dldi, vk::Instance instan
|
||||||
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
|
bit8_storage.uniformAndStorageBuffer8BitAccess = true;
|
||||||
SetNext(next, bit8_storage);
|
SetNext(next, bit8_storage);
|
||||||
|
|
||||||
|
vk::PhysicalDeviceHostQueryResetFeaturesEXT host_query_reset;
|
||||||
|
host_query_reset.hostQueryReset = true;
|
||||||
|
SetNext(next, host_query_reset);
|
||||||
|
|
||||||
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
vk::PhysicalDeviceFloat16Int8FeaturesKHR float16_int8;
|
||||||
if (is_float16_supported) {
|
if (is_float16_supported) {
|
||||||
float16_int8.shaderFloat16 = true;
|
float16_int8.shaderFloat16 = true;
|
||||||
|
@ -273,6 +278,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||||
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME,
|
||||||
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME,
|
||||||
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME,
|
||||||
|
VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME,
|
||||||
};
|
};
|
||||||
std::bitset<required_extensions.size()> available_extensions{};
|
std::bitset<required_extensions.size()> available_extensions{};
|
||||||
|
|
||||||
|
@ -340,6 +346,7 @@ bool VKDevice::IsSuitable(const vk::DispatchLoaderDynamic& dldi, vk::PhysicalDev
|
||||||
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
std::make_pair(features.depthBiasClamp, "depthBiasClamp"),
|
||||||
std::make_pair(features.geometryShader, "geometryShader"),
|
std::make_pair(features.geometryShader, "geometryShader"),
|
||||||
std::make_pair(features.tessellationShader, "tessellationShader"),
|
std::make_pair(features.tessellationShader, "tessellationShader"),
|
||||||
|
std::make_pair(features.occlusionQueryPrecise, "occlusionQueryPrecise"),
|
||||||
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
std::make_pair(features.fragmentStoresAndAtomics, "fragmentStoresAndAtomics"),
|
||||||
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
std::make_pair(features.shaderImageGatherExtended, "shaderImageGatherExtended"),
|
||||||
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
std::make_pair(features.shaderStorageImageWriteWithoutFormat,
|
||||||
|
@ -376,7 +383,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
extensions.reserve(13);
|
extensions.reserve(14);
|
||||||
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_SWAPCHAIN_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_16BIT_STORAGE_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
extensions.push_back(VK_KHR_8BIT_STORAGE_EXTENSION_NAME);
|
||||||
|
@ -384,6 +391,7 @@ std::vector<const char*> VKDevice::LoadExtensions(const vk::DispatchLoaderDynami
|
||||||
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
extensions.push_back(VK_EXT_VERTEX_ATTRIBUTE_DIVISOR_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
|
extensions.push_back(VK_EXT_SHADER_SUBGROUP_BALLOT_EXTENSION_NAME);
|
||||||
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
|
extensions.push_back(VK_EXT_SHADER_SUBGROUP_VOTE_EXTENSION_NAME);
|
||||||
|
extensions.push_back(VK_EXT_HOST_QUERY_RESET_EXTENSION_NAME);
|
||||||
|
|
||||||
[[maybe_unused]] const bool nsight =
|
[[maybe_unused]] const bool nsight =
|
||||||
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
std::getenv("NVTX_INJECTION64_PATH") || std::getenv("NSIGHT_LAUNCHED");
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
constexpr std::array QUERY_TARGETS = {vk::QueryType::eOcclusion};
|
||||||
|
|
||||||
|
constexpr vk::QueryType GetTarget(VideoCore::QueryType type) {
|
||||||
|
return QUERY_TARGETS[static_cast<std::size_t>(type)];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // Anonymous namespace
|
||||||
|
|
||||||
|
QueryPool::QueryPool() : VKFencedPool{GROW_STEP} {}
|
||||||
|
|
||||||
|
QueryPool::~QueryPool() = default;
|
||||||
|
|
||||||
|
void QueryPool::Initialize(const VKDevice& device_, VideoCore::QueryType type_) {
|
||||||
|
device = &device_;
|
||||||
|
type = type_;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> QueryPool::Commit(VKFence& fence) {
|
||||||
|
std::size_t index;
|
||||||
|
do {
|
||||||
|
index = CommitResource(fence);
|
||||||
|
} while (usage[index]);
|
||||||
|
usage[index] = true;
|
||||||
|
|
||||||
|
return {*pools[index / GROW_STEP], static_cast<std::uint32_t>(index % GROW_STEP)};
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryPool::Allocate(std::size_t begin, std::size_t end) {
|
||||||
|
usage.resize(end);
|
||||||
|
|
||||||
|
const auto dev = device->GetLogical();
|
||||||
|
const u32 size = static_cast<u32>(end - begin);
|
||||||
|
const vk::QueryPoolCreateInfo query_pool_ci({}, GetTarget(type), size, {});
|
||||||
|
pools.push_back(dev.createQueryPoolUnique(query_pool_ci, nullptr, device->GetDispatchLoader()));
|
||||||
|
}
|
||||||
|
|
||||||
|
void QueryPool::Reserve(std::pair<vk::QueryPool, std::uint32_t> query) {
|
||||||
|
const auto it =
|
||||||
|
std::find_if(std::begin(pools), std::end(pools),
|
||||||
|
[query_pool = query.first](auto& pool) { return query_pool == *pool; });
|
||||||
|
ASSERT(it != std::end(pools));
|
||||||
|
|
||||||
|
const std::ptrdiff_t pool_index = std::distance(std::begin(pools), it);
|
||||||
|
usage[pool_index * GROW_STEP + static_cast<std::ptrdiff_t>(query.second)] = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
VKQueryCache::VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const VKDevice& device, VKScheduler& scheduler)
|
||||||
|
: VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
|
||||||
|
QueryPool>{system, rasterizer},
|
||||||
|
device{device}, scheduler{scheduler} {
|
||||||
|
for (std::size_t i = 0; i < static_cast<std::size_t>(VideoCore::NumQueryTypes); ++i) {
|
||||||
|
query_pools[i].Initialize(device, static_cast<VideoCore::QueryType>(i));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
VKQueryCache::~VKQueryCache() = default;
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> VKQueryCache::AllocateQuery(VideoCore::QueryType type) {
|
||||||
|
return query_pools[static_cast<std::size_t>(type)].Commit(scheduler.GetFence());
|
||||||
|
}
|
||||||
|
|
||||||
|
void VKQueryCache::Reserve(VideoCore::QueryType type,
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> query) {
|
||||||
|
query_pools[static_cast<std::size_t>(type)].Reserve(query);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type)
|
||||||
|
: VideoCommon::HostCounterBase<VKQueryCache, HostCounter>{std::move(dependency)}, cache{cache},
|
||||||
|
type{type}, query{cache.AllocateQuery(type)}, ticks{cache.Scheduler().Ticks()} {
|
||||||
|
const auto dev = cache.Device().GetLogical();
|
||||||
|
cache.Scheduler().Record([dev, query = query](vk::CommandBuffer cmdbuf, auto& dld) {
|
||||||
|
dev.resetQueryPoolEXT(query.first, query.second, 1, dld);
|
||||||
|
cmdbuf.beginQuery(query.first, query.second, vk::QueryControlFlagBits::ePrecise, dld);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::~HostCounter() {
|
||||||
|
cache.Reserve(type, query);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostCounter::EndQuery() {
|
||||||
|
cache.Scheduler().Record([query = query](auto cmdbuf, auto& dld) {
|
||||||
|
cmdbuf.endQuery(query.first, query.second, dld);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 HostCounter::BlockingQuery() const {
|
||||||
|
if (ticks >= cache.Scheduler().Ticks()) {
|
||||||
|
cache.Scheduler().Flush();
|
||||||
|
}
|
||||||
|
|
||||||
|
const auto dev = cache.Device().GetLogical();
|
||||||
|
const auto& dld = cache.Device().GetDispatchLoader();
|
||||||
|
u64 value;
|
||||||
|
dev.getQueryPoolResults(query.first, query.second, 1, sizeof(value), &value, sizeof(value),
|
||||||
|
vk::QueryResultFlagBits::e64 | vk::QueryResultFlagBits::eWait, dld);
|
||||||
|
return value;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
|
@ -0,0 +1,104 @@
|
||||||
|
// Copyright 2020 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstddef>
|
||||||
|
#include <cstdint>
|
||||||
|
#include <memory>
|
||||||
|
#include <utility>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/query_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
|
|
||||||
|
namespace VideoCore {
|
||||||
|
class RasterizerInterface;
|
||||||
|
}
|
||||||
|
|
||||||
|
namespace Vulkan {
|
||||||
|
|
||||||
|
class CachedQuery;
|
||||||
|
class HostCounter;
|
||||||
|
class VKDevice;
|
||||||
|
class VKQueryCache;
|
||||||
|
class VKScheduler;
|
||||||
|
|
||||||
|
using CounterStream = VideoCommon::CounterStreamBase<VKQueryCache, HostCounter>;
|
||||||
|
|
||||||
|
class QueryPool final : public VKFencedPool {
|
||||||
|
public:
|
||||||
|
explicit QueryPool();
|
||||||
|
~QueryPool() override;
|
||||||
|
|
||||||
|
void Initialize(const VKDevice& device, VideoCore::QueryType type);
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> Commit(VKFence& fence);
|
||||||
|
|
||||||
|
void Reserve(std::pair<vk::QueryPool, std::uint32_t> query);
|
||||||
|
|
||||||
|
protected:
|
||||||
|
void Allocate(std::size_t begin, std::size_t end) override;
|
||||||
|
|
||||||
|
private:
|
||||||
|
static constexpr std::size_t GROW_STEP = 512;
|
||||||
|
|
||||||
|
const VKDevice* device = nullptr;
|
||||||
|
VideoCore::QueryType type = {};
|
||||||
|
|
||||||
|
std::vector<UniqueQueryPool> pools;
|
||||||
|
std::vector<bool> usage;
|
||||||
|
};
|
||||||
|
|
||||||
|
class VKQueryCache final
|
||||||
|
: public VideoCommon::QueryCacheBase<VKQueryCache, CachedQuery, CounterStream, HostCounter,
|
||||||
|
QueryPool> {
|
||||||
|
public:
|
||||||
|
explicit VKQueryCache(Core::System& system, VideoCore::RasterizerInterface& rasterizer,
|
||||||
|
const VKDevice& device, VKScheduler& scheduler);
|
||||||
|
~VKQueryCache();
|
||||||
|
|
||||||
|
std::pair<vk::QueryPool, std::uint32_t> AllocateQuery(VideoCore::QueryType type);
|
||||||
|
|
||||||
|
void Reserve(VideoCore::QueryType type, std::pair<vk::QueryPool, std::uint32_t> query);
|
||||||
|
|
||||||
|
const VKDevice& Device() const noexcept {
|
||||||
|
return device;
|
||||||
|
}
|
||||||
|
|
||||||
|
VKScheduler& Scheduler() const noexcept {
|
||||||
|
return scheduler;
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
const VKDevice& device;
|
||||||
|
VKScheduler& scheduler;
|
||||||
|
};
|
||||||
|
|
||||||
|
class HostCounter final : public VideoCommon::HostCounterBase<VKQueryCache, HostCounter> {
|
||||||
|
public:
|
||||||
|
explicit HostCounter(VKQueryCache& cache, std::shared_ptr<HostCounter> dependency,
|
||||||
|
VideoCore::QueryType type);
|
||||||
|
~HostCounter();
|
||||||
|
|
||||||
|
void EndQuery();
|
||||||
|
|
||||||
|
private:
|
||||||
|
u64 BlockingQuery() const override;
|
||||||
|
|
||||||
|
VKQueryCache& cache;
|
||||||
|
const VideoCore::QueryType type;
|
||||||
|
const std::pair<vk::QueryPool, std::uint32_t> query;
|
||||||
|
const u64 ticks;
|
||||||
|
};
|
||||||
|
|
||||||
|
class CachedQuery : public VideoCommon::CachedQueryBase<HostCounter> {
|
||||||
|
public:
|
||||||
|
explicit CachedQuery(VKQueryCache&, VideoCore::QueryType, VAddr cpu_addr, u8* host_ptr)
|
||||||
|
: VideoCommon::CachedQueryBase<HostCounter>{cpu_addr, host_ptr} {}
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace Vulkan
|
|
@ -289,7 +289,9 @@ RasterizerVulkan::RasterizerVulkan(Core::System& system, Core::Frontend::EmuWind
|
||||||
staging_pool),
|
staging_pool),
|
||||||
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
|
pipeline_cache(system, *this, device, scheduler, descriptor_pool, update_descriptor_queue),
|
||||||
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
|
buffer_cache(*this, system, device, memory_manager, scheduler, staging_pool),
|
||||||
sampler_cache(device) {}
|
sampler_cache(device), query_cache(system, *this, device, scheduler) {
|
||||||
|
scheduler.SetQueryCache(query_cache);
|
||||||
|
}
|
||||||
|
|
||||||
RasterizerVulkan::~RasterizerVulkan() = default;
|
RasterizerVulkan::~RasterizerVulkan() = default;
|
||||||
|
|
||||||
|
@ -308,6 +310,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
|
|
||||||
FlushWork();
|
FlushWork();
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
|
GraphicsPipelineCacheKey key{GetFixedPipelineState(gpu.regs)};
|
||||||
|
|
||||||
|
@ -362,6 +366,8 @@ void RasterizerVulkan::Draw(bool is_indexed, bool is_instanced) {
|
||||||
void RasterizerVulkan::Clear() {
|
void RasterizerVulkan::Clear() {
|
||||||
MICROPROFILE_SCOPE(Vulkan_Clearing);
|
MICROPROFILE_SCOPE(Vulkan_Clearing);
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& gpu = system.GPU().Maxwell3D();
|
const auto& gpu = system.GPU().Maxwell3D();
|
||||||
if (!system.GPU().Maxwell3D().ShouldExecute()) {
|
if (!system.GPU().Maxwell3D().ShouldExecute()) {
|
||||||
return;
|
return;
|
||||||
|
@ -429,6 +435,8 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
sampled_views.clear();
|
sampled_views.clear();
|
||||||
image_views.clear();
|
image_views.clear();
|
||||||
|
|
||||||
|
query_cache.UpdateCounters();
|
||||||
|
|
||||||
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
|
||||||
const ComputePipelineCacheKey key{
|
const ComputePipelineCacheKey key{
|
||||||
code_addr,
|
code_addr,
|
||||||
|
@ -471,17 +479,28 @@ void RasterizerVulkan::DispatchCompute(GPUVAddr code_addr) {
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
query_cache.ResetCounter(type);
|
||||||
|
}
|
||||||
|
|
||||||
|
void RasterizerVulkan::Query(GPUVAddr gpu_addr, VideoCore::QueryType type,
|
||||||
|
std::optional<u64> timestamp) {
|
||||||
|
query_cache.Query(gpu_addr, type, timestamp);
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushAll() {}
|
void RasterizerVulkan::FlushAll() {}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
|
void RasterizerVulkan::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
texture_cache.FlushRegion(addr, size);
|
texture_cache.FlushRegion(addr, size);
|
||||||
buffer_cache.FlushRegion(addr, size);
|
buffer_cache.FlushRegion(addr, size);
|
||||||
|
query_cache.FlushRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerVulkan::InvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
texture_cache.InvalidateRegion(addr, size);
|
texture_cache.InvalidateRegion(addr, size);
|
||||||
pipeline_cache.InvalidateRegion(addr, size);
|
pipeline_cache.InvalidateRegion(addr, size);
|
||||||
buffer_cache.InvalidateRegion(addr, size);
|
buffer_cache.InvalidateRegion(addr, size);
|
||||||
|
query_cache.InvalidateRegion(addr, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
void RasterizerVulkan::FlushAndInvalidateRegion(CacheAddr addr, u64 size) {
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
#include "video_core/renderer_vulkan/vk_descriptor_pool.h"
|
||||||
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
#include "video_core/renderer_vulkan/vk_memory_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
#include "video_core/renderer_vulkan/vk_pipeline_cache.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
#include "video_core/renderer_vulkan/vk_renderpass_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
|
#include "video_core/renderer_vulkan/vk_sampler_cache.h"
|
||||||
|
@ -96,7 +97,7 @@ struct ImageView {
|
||||||
vk::ImageLayout* layout = nullptr;
|
vk::ImageLayout* layout = nullptr;
|
||||||
};
|
};
|
||||||
|
|
||||||
class RasterizerVulkan : public VideoCore::RasterizerAccelerated {
|
class RasterizerVulkan final : public VideoCore::RasterizerAccelerated {
|
||||||
public:
|
public:
|
||||||
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
|
explicit RasterizerVulkan(Core::System& system, Core::Frontend::EmuWindow& render_window,
|
||||||
VKScreenInfo& screen_info, const VKDevice& device,
|
VKScreenInfo& screen_info, const VKDevice& device,
|
||||||
|
@ -108,6 +109,8 @@ public:
|
||||||
bool DrawMultiBatch(bool is_indexed) override;
|
bool DrawMultiBatch(bool is_indexed) override;
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void DispatchCompute(GPUVAddr code_addr) override;
|
void DispatchCompute(GPUVAddr code_addr) override;
|
||||||
|
void ResetCounter(VideoCore::QueryType type) override;
|
||||||
|
void Query(GPUVAddr gpu_addr, VideoCore::QueryType type, std::optional<u64> timestamp) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
|
@ -247,6 +250,7 @@ private:
|
||||||
VKPipelineCache pipeline_cache;
|
VKPipelineCache pipeline_cache;
|
||||||
VKBufferCache buffer_cache;
|
VKBufferCache buffer_cache;
|
||||||
VKSamplerCache sampler_cache;
|
VKSamplerCache sampler_cache;
|
||||||
|
VKQueryCache query_cache;
|
||||||
|
|
||||||
std::array<View, Maxwell::NumRenderTargets> color_attachments;
|
std::array<View, Maxwell::NumRenderTargets> color_attachments;
|
||||||
View zeta_attachment;
|
View zeta_attachment;
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "common/microprofile.h"
|
#include "common/microprofile.h"
|
||||||
#include "video_core/renderer_vulkan/declarations.h"
|
#include "video_core/renderer_vulkan/declarations.h"
|
||||||
#include "video_core/renderer_vulkan/vk_device.h"
|
#include "video_core/renderer_vulkan/vk_device.h"
|
||||||
|
#include "video_core/renderer_vulkan/vk_query_cache.h"
|
||||||
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
#include "video_core/renderer_vulkan/vk_resource_manager.h"
|
||||||
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
#include "video_core/renderer_vulkan/vk_scheduler.h"
|
||||||
|
|
||||||
|
@ -139,6 +140,8 @@ void VKScheduler::SubmitExecution(vk::Semaphore semaphore) {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKScheduler::AllocateNewContext() {
|
void VKScheduler::AllocateNewContext() {
|
||||||
|
++ticks;
|
||||||
|
|
||||||
std::unique_lock lock{mutex};
|
std::unique_lock lock{mutex};
|
||||||
current_fence = next_fence;
|
current_fence = next_fence;
|
||||||
next_fence = &resource_manager.CommitFence();
|
next_fence = &resource_manager.CommitFence();
|
||||||
|
@ -146,6 +149,10 @@ void VKScheduler::AllocateNewContext() {
|
||||||
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
|
current_cmdbuf = resource_manager.CommitCommandBuffer(*current_fence);
|
||||||
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
|
current_cmdbuf.begin({vk::CommandBufferUsageFlagBits::eOneTimeSubmit},
|
||||||
device.GetDispatchLoader());
|
device.GetDispatchLoader());
|
||||||
|
// Enable counters once again. These are disabled when a command buffer is finished.
|
||||||
|
if (query_cache) {
|
||||||
|
query_cache->UpdateCounters();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKScheduler::InvalidateState() {
|
void VKScheduler::InvalidateState() {
|
||||||
|
@ -159,6 +166,7 @@ void VKScheduler::InvalidateState() {
|
||||||
}
|
}
|
||||||
|
|
||||||
void VKScheduler::EndPendingOperations() {
|
void VKScheduler::EndPendingOperations() {
|
||||||
|
query_cache->DisableStreams();
|
||||||
EndRenderPass();
|
EndRenderPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
#include <atomic>
|
||||||
#include <condition_variable>
|
#include <condition_variable>
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <optional>
|
#include <optional>
|
||||||
|
@ -18,6 +19,7 @@ namespace Vulkan {
|
||||||
|
|
||||||
class VKDevice;
|
class VKDevice;
|
||||||
class VKFence;
|
class VKFence;
|
||||||
|
class VKQueryCache;
|
||||||
class VKResourceManager;
|
class VKResourceManager;
|
||||||
|
|
||||||
class VKFenceView {
|
class VKFenceView {
|
||||||
|
@ -67,6 +69,11 @@ public:
|
||||||
/// Binds a pipeline to the current execution context.
|
/// Binds a pipeline to the current execution context.
|
||||||
void BindGraphicsPipeline(vk::Pipeline pipeline);
|
void BindGraphicsPipeline(vk::Pipeline pipeline);
|
||||||
|
|
||||||
|
/// Assigns the query cache.
|
||||||
|
void SetQueryCache(VKQueryCache& query_cache_) {
|
||||||
|
query_cache = &query_cache_;
|
||||||
|
}
|
||||||
|
|
||||||
/// Returns true when viewports have been set in the current command buffer.
|
/// Returns true when viewports have been set in the current command buffer.
|
||||||
bool TouchViewports() {
|
bool TouchViewports() {
|
||||||
return std::exchange(state.viewports, true);
|
return std::exchange(state.viewports, true);
|
||||||
|
@ -112,6 +119,11 @@ public:
|
||||||
return current_fence;
|
return current_fence;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns the current command buffer tick.
|
||||||
|
u64 Ticks() const {
|
||||||
|
return ticks;
|
||||||
|
}
|
||||||
|
|
||||||
private:
|
private:
|
||||||
class Command {
|
class Command {
|
||||||
public:
|
public:
|
||||||
|
@ -205,6 +217,8 @@ private:
|
||||||
|
|
||||||
const VKDevice& device;
|
const VKDevice& device;
|
||||||
VKResourceManager& resource_manager;
|
VKResourceManager& resource_manager;
|
||||||
|
VKQueryCache* query_cache = nullptr;
|
||||||
|
|
||||||
vk::CommandBuffer current_cmdbuf;
|
vk::CommandBuffer current_cmdbuf;
|
||||||
VKFence* current_fence = nullptr;
|
VKFence* current_fence = nullptr;
|
||||||
VKFence* next_fence = nullptr;
|
VKFence* next_fence = nullptr;
|
||||||
|
@ -227,6 +241,7 @@ private:
|
||||||
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
|
Common::SPSCQueue<std::unique_ptr<CommandChunk>> chunk_reserve;
|
||||||
std::mutex mutex;
|
std::mutex mutex;
|
||||||
std::condition_variable cv;
|
std::condition_variable cv;
|
||||||
|
std::atomic<u64> ticks = 0;
|
||||||
bool quit = false;
|
bool quit = false;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue