GPUDevice: Use row-major matrix packing
With column vectors. mul() turns into dot products instead of madds.
This commit is contained in:
parent
e22d67f4aa
commit
acf04ed67a
|
@ -5,4 +5,4 @@
|
||||||
|
|
||||||
#include "common/types.h"
|
#include "common/types.h"
|
||||||
|
|
||||||
static constexpr u32 SHADER_CACHE_VERSION = 22;
|
static constexpr u32 SHADER_CACHE_VERSION = 23;
|
||||||
|
|
|
@ -475,8 +475,9 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShaderWithFXC(u32 shader_m
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3;
|
static constexpr UINT flags_non_debug = D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_OPTIMIZATION_LEVEL3;
|
||||||
static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG;
|
static constexpr UINT flags_debug =
|
||||||
|
D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG;
|
||||||
|
|
||||||
Microsoft::WRL::ComPtr<ID3DBlob> blob;
|
Microsoft::WRL::ComPtr<ID3DBlob> blob;
|
||||||
Microsoft::WRL::ComPtr<ID3DBlob> error_blob;
|
Microsoft::WRL::ComPtr<ID3DBlob> error_blob;
|
||||||
|
@ -556,12 +557,14 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShaderWithDXC(u32 shader_m
|
||||||
static constexpr const wchar_t* nondebug_arguments[] = {
|
static constexpr const wchar_t* nondebug_arguments[] = {
|
||||||
L"-Qstrip_reflect",
|
L"-Qstrip_reflect",
|
||||||
L"-Qstrip_debug",
|
L"-Qstrip_debug",
|
||||||
|
DXC_ARG_PACK_MATRIX_ROW_MAJOR,
|
||||||
DXC_ARG_OPTIMIZATION_LEVEL3,
|
DXC_ARG_OPTIMIZATION_LEVEL3,
|
||||||
};
|
};
|
||||||
static constexpr const wchar_t* debug_arguments[] = {
|
static constexpr const wchar_t* debug_arguments[] = {
|
||||||
L"-Qstrip_reflect",
|
L"-Qstrip_reflect",
|
||||||
DXC_ARG_DEBUG,
|
DXC_ARG_DEBUG,
|
||||||
L"-Qembed_debug",
|
L"-Qembed_debug",
|
||||||
|
DXC_ARG_PACK_MATRIX_ROW_MAJOR,
|
||||||
DXC_ARG_SKIP_OPTIMIZATIONS,
|
DXC_ARG_SKIP_OPTIMIZATIONS,
|
||||||
};
|
};
|
||||||
const wchar_t* const* arguments = debug_device ? debug_arguments : nondebug_arguments;
|
const wchar_t* const* arguments = debug_device ? debug_arguments : nondebug_arguments;
|
||||||
|
|
|
@ -677,17 +677,9 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
|
||||||
SetPipeline(m_imgui_pipeline.get());
|
SetPipeline(m_imgui_pipeline.get());
|
||||||
SetViewportAndScissor(0, 0, swap_chain->GetWidth(), swap_chain->GetHeight());
|
SetViewportAndScissor(0, 0, swap_chain->GetWidth(), swap_chain->GetHeight());
|
||||||
|
|
||||||
const float L = 0.0f;
|
const GSMatrix4x4 mproj = GSMatrix4x4::OffCenterOrthographicProjection(
|
||||||
const float R = static_cast<float>(swap_chain->GetWidth());
|
0.0f, 0.0f, static_cast<float>(swap_chain->GetWidth()), static_cast<float>(swap_chain->GetHeight()), 0.0f, 1.0f);
|
||||||
const float T = 0.0f;
|
PushUniformBuffer(&mproj, sizeof(mproj));
|
||||||
const float B = static_cast<float>(swap_chain->GetHeight());
|
|
||||||
const float ortho_projection[4][4] = {
|
|
||||||
{2.0f / (R - L), 0.0f, 0.0f, 0.0f},
|
|
||||||
{0.0f, 2.0f / (T - B), 0.0f, 0.0f},
|
|
||||||
{0.0f, 0.0f, 0.5f, 0.0f},
|
|
||||||
{(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f},
|
|
||||||
};
|
|
||||||
PushUniformBuffer(ortho_projection, sizeof(ortho_projection));
|
|
||||||
|
|
||||||
// Render command lists
|
// Render command lists
|
||||||
const bool flip = UsesLowerLeftOrigin();
|
const bool flip = UsesLowerLeftOrigin();
|
||||||
|
@ -708,20 +700,11 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
|
||||||
if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y)
|
if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y)
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
GSVector4i clip = GSVector4i(GSVector4::load<false>(&pcmd->ClipRect.x));
|
||||||
if (flip)
|
if (flip)
|
||||||
{
|
clip = FlipToLowerLeft(clip, swap_chain->GetHeight());
|
||||||
const s32 height = static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y);
|
|
||||||
const s32 flipped_y = static_cast<s32>(swap_chain->GetHeight()) - static_cast<s32>(pcmd->ClipRect.y) - height;
|
|
||||||
SetScissor(static_cast<s32>(pcmd->ClipRect.x), flipped_y, static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
|
|
||||||
height);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y),
|
|
||||||
static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
|
|
||||||
static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y));
|
|
||||||
}
|
|
||||||
|
|
||||||
|
SetScissor(clip);
|
||||||
SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get());
|
SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get());
|
||||||
DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset);
|
DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset);
|
||||||
}
|
}
|
||||||
|
|
|
@ -357,20 +357,20 @@ void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_c
|
||||||
{
|
{
|
||||||
if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
|
if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
|
||||||
{
|
{
|
||||||
ss << "layout(push_constant) uniform PushConstants\n";
|
ss << "layout(push_constant, row_major) uniform PushConstants\n";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n";
|
ss << "layout(std140, row_major, set = 0, binding = 0) uniform UBOBlock\n";
|
||||||
m_has_uniform_buffer = true;
|
m_has_uniform_buffer = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (m_glsl)
|
else if (m_glsl)
|
||||||
{
|
{
|
||||||
if (m_use_glsl_binding_layout)
|
if (m_use_glsl_binding_layout)
|
||||||
ss << "layout(std140, binding = 0) uniform UBOBlock\n";
|
ss << "layout(std140, row_major, binding = 0) uniform UBOBlock\n";
|
||||||
else
|
else
|
||||||
ss << "layout(std140) uniform UBOBlock\n";
|
ss << "layout(std140, row_major) uniform UBOBlock\n";
|
||||||
|
|
||||||
m_has_uniform_buffer = true;
|
m_has_uniform_buffer = true;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue