forked from ShuriZma/suyu
maxwell_3d: Slow implementation of passed samples (query 21)
Implements GL_SAMPLES_PASSED by waiting immediately for queries.
This commit is contained in:
parent
3217400dd1
commit
2b58652f08
|
@ -74,6 +74,8 @@ add_library(video_core STATIC
|
||||||
renderer_opengl/gl_stream_buffer.h
|
renderer_opengl/gl_stream_buffer.h
|
||||||
renderer_opengl/gl_texture_cache.cpp
|
renderer_opengl/gl_texture_cache.cpp
|
||||||
renderer_opengl/gl_texture_cache.h
|
renderer_opengl/gl_texture_cache.h
|
||||||
|
renderer_opengl/gl_query_cache.cpp
|
||||||
|
renderer_opengl/gl_query_cache.h
|
||||||
renderer_opengl/maxwell_to_gl.h
|
renderer_opengl/maxwell_to_gl.h
|
||||||
renderer_opengl/renderer_opengl.cpp
|
renderer_opengl/renderer_opengl.cpp
|
||||||
renderer_opengl/renderer_opengl.h
|
renderer_opengl/renderer_opengl.h
|
||||||
|
|
|
@ -400,6 +400,10 @@ void Maxwell3D::CallMethod(const GPU::MethodCall& method_call) {
|
||||||
ProcessQueryCondition();
|
ProcessQueryCondition();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
case MAXWELL3D_REG_INDEX(counter_reset): {
|
||||||
|
ProcessCounterReset();
|
||||||
|
break;
|
||||||
|
}
|
||||||
case MAXWELL3D_REG_INDEX(sync_info): {
|
case MAXWELL3D_REG_INDEX(sync_info): {
|
||||||
ProcessSyncPoint();
|
ProcessSyncPoint();
|
||||||
break;
|
break;
|
||||||
|
@ -544,23 +548,23 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
"Units other than CROP are unimplemented");
|
"Units other than CROP are unimplemented");
|
||||||
|
|
||||||
switch (regs.query.query_get.operation) {
|
switch (regs.query.query_get.operation) {
|
||||||
case Regs::QueryOperation::Release: {
|
case Regs::QueryOperation::Release:
|
||||||
const u64 result = regs.query.query_sequence;
|
StampQueryResult(regs.query.query_sequence, regs.query.query_get.short_query == 0);
|
||||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
|
||||||
break;
|
break;
|
||||||
}
|
case Regs::QueryOperation::Acquire:
|
||||||
case Regs::QueryOperation::Acquire: {
|
// TODO(Blinkhawk): Under this operation, the GPU waits for the CPU to write a value that
|
||||||
// Todo(Blinkhawk): Under this operation, the GPU waits for the CPU
|
// matches the current payload.
|
||||||
// to write a value that matches the current payload.
|
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
UNIMPLEMENTED_MSG("Unimplemented query operation ACQUIRE");
|
||||||
break;
|
break;
|
||||||
}
|
|
||||||
case Regs::QueryOperation::Counter: {
|
case Regs::QueryOperation::Counter: {
|
||||||
u64 result{};
|
u64 result;
|
||||||
switch (regs.query.query_get.select) {
|
switch (regs.query.query_get.select) {
|
||||||
case Regs::QuerySelect::Zero:
|
case Regs::QuerySelect::Zero:
|
||||||
result = 0;
|
result = 0;
|
||||||
break;
|
break;
|
||||||
|
case Regs::QuerySelect::SamplesPassed:
|
||||||
|
result = rasterizer.Query(VideoCore::QueryType::SamplesPassed);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
result = 1;
|
result = 1;
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
UNIMPLEMENTED_MSG("Unimplemented query select type {}",
|
||||||
|
@ -569,15 +573,13 @@ void Maxwell3D::ProcessQueryGet() {
|
||||||
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
StampQueryResult(result, regs.query.query_get.short_query == 0);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case Regs::QueryOperation::Trap: {
|
case Regs::QueryOperation::Trap:
|
||||||
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
UNIMPLEMENTED_MSG("Unimplemented query operation TRAP");
|
||||||
break;
|
break;
|
||||||
}
|
default:
|
||||||
default: {
|
|
||||||
UNIMPLEMENTED_MSG("Unknown query operation");
|
UNIMPLEMENTED_MSG("Unknown query operation");
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessQueryCondition() {
|
void Maxwell3D::ProcessQueryCondition() {
|
||||||
|
@ -619,6 +621,17 @@ void Maxwell3D::ProcessQueryCondition() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void Maxwell3D::ProcessCounterReset() {
|
||||||
|
switch (regs.counter_reset) {
|
||||||
|
case Regs::CounterReset::SampleCnt:
|
||||||
|
rasterizer.ResetCounter(VideoCore::QueryType::SamplesPassed);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("counter_reset={}", static_cast<u32>(regs.counter_reset));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void Maxwell3D::ProcessSyncPoint() {
|
void Maxwell3D::ProcessSyncPoint() {
|
||||||
const u32 sync_point = regs.sync_info.sync_point.Value();
|
const u32 sync_point = regs.sync_info.sync_point.Value();
|
||||||
const u32 increment = regs.sync_info.increment.Value();
|
const u32 increment = regs.sync_info.increment.Value();
|
||||||
|
|
|
@ -409,6 +409,27 @@ public:
|
||||||
Linear = 1,
|
Linear = 1,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
enum class CounterReset : u32 {
|
||||||
|
SampleCnt = 0x01,
|
||||||
|
Unk02 = 0x02,
|
||||||
|
Unk03 = 0x03,
|
||||||
|
Unk04 = 0x04,
|
||||||
|
EmittedPrimitives = 0x10, // Not tested
|
||||||
|
Unk11 = 0x11,
|
||||||
|
Unk12 = 0x12,
|
||||||
|
Unk13 = 0x13,
|
||||||
|
Unk15 = 0x15,
|
||||||
|
Unk16 = 0x16,
|
||||||
|
Unk17 = 0x17,
|
||||||
|
Unk18 = 0x18,
|
||||||
|
Unk1A = 0x1A,
|
||||||
|
Unk1B = 0x1B,
|
||||||
|
Unk1C = 0x1C,
|
||||||
|
Unk1D = 0x1D,
|
||||||
|
Unk1E = 0x1E,
|
||||||
|
GeneratedPrimitives = 0x1F,
|
||||||
|
};
|
||||||
|
|
||||||
struct Cull {
|
struct Cull {
|
||||||
enum class FrontFace : u32 {
|
enum class FrontFace : u32 {
|
||||||
ClockWise = 0x0900,
|
ClockWise = 0x0900,
|
||||||
|
@ -857,7 +878,7 @@ public:
|
||||||
BitField<7, 1, u32> c7;
|
BitField<7, 1, u32> c7;
|
||||||
} clip_distance_enabled;
|
} clip_distance_enabled;
|
||||||
|
|
||||||
INSERT_UNION_PADDING_WORDS(0x1);
|
u32 samplecnt_enable;
|
||||||
|
|
||||||
float point_size;
|
float point_size;
|
||||||
|
|
||||||
|
@ -865,7 +886,11 @@ public:
|
||||||
|
|
||||||
u32 point_sprite_enable;
|
u32 point_sprite_enable;
|
||||||
|
|
||||||
INSERT_UNION_PADDING_WORDS(0x5);
|
INSERT_UNION_PADDING_WORDS(0x3);
|
||||||
|
|
||||||
|
CounterReset counter_reset;
|
||||||
|
|
||||||
|
INSERT_UNION_PADDING_WORDS(0x1);
|
||||||
|
|
||||||
u32 zeta_enable;
|
u32 zeta_enable;
|
||||||
|
|
||||||
|
@ -1412,12 +1437,15 @@ private:
|
||||||
/// Handles a write to the QUERY_GET register.
|
/// Handles a write to the QUERY_GET register.
|
||||||
void ProcessQueryGet();
|
void ProcessQueryGet();
|
||||||
|
|
||||||
// Writes the query result accordingly
|
/// Writes the query result accordingly.
|
||||||
void StampQueryResult(u64 payload, bool long_query);
|
void StampQueryResult(u64 payload, bool long_query);
|
||||||
|
|
||||||
// Handles Conditional Rendering
|
/// Handles conditional rendering.
|
||||||
void ProcessQueryCondition();
|
void ProcessQueryCondition();
|
||||||
|
|
||||||
|
/// Handles counter resets.
|
||||||
|
void ProcessCounterReset();
|
||||||
|
|
||||||
/// Handles writes to syncing register.
|
/// Handles writes to syncing register.
|
||||||
void ProcessSyncPoint();
|
void ProcessSyncPoint();
|
||||||
|
|
||||||
|
@ -1499,8 +1527,10 @@ ASSERT_REG_POSITION(screen_y_control, 0x4EB);
|
||||||
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
ASSERT_REG_POSITION(vb_element_base, 0x50D);
|
||||||
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
ASSERT_REG_POSITION(vb_base_instance, 0x50E);
|
||||||
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
ASSERT_REG_POSITION(clip_distance_enabled, 0x544);
|
||||||
|
ASSERT_REG_POSITION(samplecnt_enable, 0x545);
|
||||||
ASSERT_REG_POSITION(point_size, 0x546);
|
ASSERT_REG_POSITION(point_size, 0x546);
|
||||||
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
|
ASSERT_REG_POSITION(point_sprite_enable, 0x548);
|
||||||
|
ASSERT_REG_POSITION(counter_reset, 0x54C);
|
||||||
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
ASSERT_REG_POSITION(zeta_enable, 0x54E);
|
||||||
ASSERT_REG_POSITION(multisample_control, 0x54F);
|
ASSERT_REG_POSITION(multisample_control, 0x54F);
|
||||||
ASSERT_REG_POSITION(condition, 0x554);
|
ASSERT_REG_POSITION(condition, 0x554);
|
||||||
|
|
|
@ -17,6 +17,10 @@ class MemoryManager;
|
||||||
|
|
||||||
namespace VideoCore {
|
namespace VideoCore {
|
||||||
|
|
||||||
|
enum class QueryType {
|
||||||
|
SamplesPassed,
|
||||||
|
};
|
||||||
|
|
||||||
enum class LoadCallbackStage {
|
enum class LoadCallbackStage {
|
||||||
Prepare,
|
Prepare,
|
||||||
Decompile,
|
Decompile,
|
||||||
|
@ -41,6 +45,12 @@ public:
|
||||||
/// Dispatches a compute shader invocation
|
/// Dispatches a compute shader invocation
|
||||||
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
|
virtual void DispatchCompute(GPUVAddr code_addr) = 0;
|
||||||
|
|
||||||
|
/// Resets the counter of a query
|
||||||
|
virtual void ResetCounter(QueryType type) = 0;
|
||||||
|
|
||||||
|
/// Returns the value of a GPU query
|
||||||
|
virtual u64 Query(QueryType type) = 0;
|
||||||
|
|
||||||
/// Notify rasterizer that all caches should be flushed to Switch memory
|
/// Notify rasterizer that all caches should be flushed to Switch memory
|
||||||
virtual void FlushAll() = 0;
|
virtual void FlushAll() = 0;
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,59 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#include <glad/glad.h>
|
||||||
|
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
HostCounter::HostCounter(GLenum target) {
|
||||||
|
query.Create(target);
|
||||||
|
}
|
||||||
|
|
||||||
|
HostCounter::~HostCounter() = default;
|
||||||
|
|
||||||
|
void HostCounter::UpdateState(bool enabled) {
|
||||||
|
if (enabled) {
|
||||||
|
Enable();
|
||||||
|
} else {
|
||||||
|
Disable();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostCounter::Reset() {
|
||||||
|
counter = 0;
|
||||||
|
Disable();
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 HostCounter::Query() {
|
||||||
|
if (!is_beginned) {
|
||||||
|
return counter;
|
||||||
|
}
|
||||||
|
Disable();
|
||||||
|
u64 value;
|
||||||
|
glGetQueryObjectui64v(query.handle, GL_QUERY_RESULT, &value);
|
||||||
|
Enable();
|
||||||
|
|
||||||
|
counter += value;
|
||||||
|
return counter;
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostCounter::Enable() {
|
||||||
|
if (is_beginned) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
is_beginned = true;
|
||||||
|
glBeginQuery(GL_SAMPLES_PASSED, query.handle);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HostCounter::Disable() {
|
||||||
|
if (!is_beginned) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
glEndQuery(GL_SAMPLES_PASSED);
|
||||||
|
is_beginned = false;
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -0,0 +1,41 @@
|
||||||
|
// Copyright 2019 yuzu Emulator Project
|
||||||
|
// Licensed under GPLv2 or any later version
|
||||||
|
// Refer to the license.txt file included.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <glad/glad.h>
|
||||||
|
|
||||||
|
#include "common/common_types.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
|
|
||||||
|
namespace OpenGL {
|
||||||
|
|
||||||
|
class HostCounter final {
|
||||||
|
public:
|
||||||
|
explicit HostCounter(GLenum target);
|
||||||
|
~HostCounter();
|
||||||
|
|
||||||
|
/// Enables or disables the counter as required.
|
||||||
|
void UpdateState(bool enabled);
|
||||||
|
|
||||||
|
/// Resets the counter disabling it if needed.
|
||||||
|
void Reset();
|
||||||
|
|
||||||
|
/// Returns the current value of the query.
|
||||||
|
/// @note It may harm precision of future queries if the counter is not disabled.
|
||||||
|
u64 Query();
|
||||||
|
|
||||||
|
private:
|
||||||
|
/// Enables the counter when disabled.
|
||||||
|
void Enable();
|
||||||
|
|
||||||
|
/// Disables the counter when enabled.
|
||||||
|
void Disable();
|
||||||
|
|
||||||
|
OGLQuery query; ///< OpenGL query.
|
||||||
|
u64 counter{}; ///< Added values of the counter.
|
||||||
|
bool is_beginned{}; ///< True when the OpenGL query is beginned.
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace OpenGL
|
|
@ -547,6 +547,9 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
|
||||||
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
MICROPROFILE_SCOPE(OpenGL_Drawing);
|
||||||
auto& gpu = system.GPU().Maxwell3D();
|
auto& gpu = system.GPU().Maxwell3D();
|
||||||
|
|
||||||
|
const auto& regs = gpu.regs;
|
||||||
|
samples_passed.UpdateState(regs.samplecnt_enable);
|
||||||
|
|
||||||
SyncRasterizeEnable(state);
|
SyncRasterizeEnable(state);
|
||||||
SyncColorMask();
|
SyncColorMask();
|
||||||
SyncFragmentColorClampState();
|
SyncFragmentColorClampState();
|
||||||
|
@ -709,6 +712,27 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
|
||||||
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void RasterizerOpenGL::ResetCounter(VideoCore::QueryType type) {
|
||||||
|
switch (type) {
|
||||||
|
case VideoCore::QueryType::SamplesPassed:
|
||||||
|
samples_passed.Reset();
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
u64 RasterizerOpenGL::Query(VideoCore::QueryType type) {
|
||||||
|
switch (type) {
|
||||||
|
case VideoCore::QueryType::SamplesPassed:
|
||||||
|
return samples_passed.Query();
|
||||||
|
default:
|
||||||
|
UNIMPLEMENTED_MSG("type={}", static_cast<u32>(type));
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushAll() {}
|
void RasterizerOpenGL::FlushAll() {}
|
||||||
|
|
||||||
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
void RasterizerOpenGL::FlushRegion(CacheAddr addr, u64 size) {
|
||||||
|
|
|
@ -24,6 +24,7 @@
|
||||||
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
#include "video_core/renderer_opengl/gl_buffer_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_device.h"
|
#include "video_core/renderer_opengl/gl_device.h"
|
||||||
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
#include "video_core/renderer_opengl/gl_framebuffer_cache.h"
|
||||||
|
#include "video_core/renderer_opengl/gl_query_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
#include "video_core/renderer_opengl/gl_resource_manager.h"
|
||||||
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
#include "video_core/renderer_opengl/gl_sampler_cache.h"
|
||||||
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
#include "video_core/renderer_opengl/gl_shader_cache.h"
|
||||||
|
@ -61,6 +62,8 @@ public:
|
||||||
bool DrawMultiBatch(bool is_indexed) override;
|
bool DrawMultiBatch(bool is_indexed) override;
|
||||||
void Clear() override;
|
void Clear() override;
|
||||||
void DispatchCompute(GPUVAddr code_addr) override;
|
void DispatchCompute(GPUVAddr code_addr) override;
|
||||||
|
void ResetCounter(VideoCore::QueryType type) override;
|
||||||
|
u64 Query(VideoCore::QueryType type) override;
|
||||||
void FlushAll() override;
|
void FlushAll() override;
|
||||||
void FlushRegion(CacheAddr addr, u64 size) override;
|
void FlushRegion(CacheAddr addr, u64 size) override;
|
||||||
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
void InvalidateRegion(CacheAddr addr, u64 size) override;
|
||||||
|
@ -221,6 +224,8 @@ private:
|
||||||
GLintptr SetupIndexBuffer();
|
GLintptr SetupIndexBuffer();
|
||||||
|
|
||||||
void SetupShaders(GLenum primitive_mode);
|
void SetupShaders(GLenum primitive_mode);
|
||||||
|
|
||||||
|
HostCounter samples_passed{GL_SAMPLES_PASSED};
|
||||||
};
|
};
|
||||||
|
|
||||||
} // namespace OpenGL
|
} // namespace OpenGL
|
||||||
|
|
Loading…
Reference in New Issue