GPUDevice: Use row-major matrix packing

With column vectors. mul() turns into dot products instead of madds.
This commit is contained in:
Stenzek 2024-11-13 14:35:13 +10:00
parent e22d67f4aa
commit acf04ed67a
No known key found for this signature in database
4 changed files with 16 additions and 30 deletions

View File

@ -5,4 +5,4 @@
#include "common/types.h" #include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 22; static constexpr u32 SHADER_CACHE_VERSION = 23;

View File

@ -475,8 +475,9 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShaderWithFXC(u32 shader_m
return {}; return {};
} }
static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3; static constexpr UINT flags_non_debug = D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_OPTIMIZATION_LEVEL3;
static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG; static constexpr UINT flags_debug =
D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG;
Microsoft::WRL::ComPtr<ID3DBlob> blob; Microsoft::WRL::ComPtr<ID3DBlob> blob;
Microsoft::WRL::ComPtr<ID3DBlob> error_blob; Microsoft::WRL::ComPtr<ID3DBlob> error_blob;
@ -556,12 +557,14 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShaderWithDXC(u32 shader_m
static constexpr const wchar_t* nondebug_arguments[] = { static constexpr const wchar_t* nondebug_arguments[] = {
L"-Qstrip_reflect", L"-Qstrip_reflect",
L"-Qstrip_debug", L"-Qstrip_debug",
DXC_ARG_PACK_MATRIX_ROW_MAJOR,
DXC_ARG_OPTIMIZATION_LEVEL3, DXC_ARG_OPTIMIZATION_LEVEL3,
}; };
static constexpr const wchar_t* debug_arguments[] = { static constexpr const wchar_t* debug_arguments[] = {
L"-Qstrip_reflect", L"-Qstrip_reflect",
DXC_ARG_DEBUG, DXC_ARG_DEBUG,
L"-Qembed_debug", L"-Qembed_debug",
DXC_ARG_PACK_MATRIX_ROW_MAJOR,
DXC_ARG_SKIP_OPTIMIZATIONS, DXC_ARG_SKIP_OPTIMIZATIONS,
}; };
const wchar_t* const* arguments = debug_device ? debug_arguments : nondebug_arguments; const wchar_t* const* arguments = debug_device ? debug_arguments : nondebug_arguments;

View File

@ -677,17 +677,9 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
SetPipeline(m_imgui_pipeline.get()); SetPipeline(m_imgui_pipeline.get());
SetViewportAndScissor(0, 0, swap_chain->GetWidth(), swap_chain->GetHeight()); SetViewportAndScissor(0, 0, swap_chain->GetWidth(), swap_chain->GetHeight());
const float L = 0.0f; const GSMatrix4x4 mproj = GSMatrix4x4::OffCenterOrthographicProjection(
const float R = static_cast<float>(swap_chain->GetWidth()); 0.0f, 0.0f, static_cast<float>(swap_chain->GetWidth()), static_cast<float>(swap_chain->GetHeight()), 0.0f, 1.0f);
const float T = 0.0f; PushUniformBuffer(&mproj, sizeof(mproj));
const float B = static_cast<float>(swap_chain->GetHeight());
const float ortho_projection[4][4] = {
{2.0f / (R - L), 0.0f, 0.0f, 0.0f},
{0.0f, 2.0f / (T - B), 0.0f, 0.0f},
{0.0f, 0.0f, 0.5f, 0.0f},
{(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f},
};
PushUniformBuffer(ortho_projection, sizeof(ortho_projection));
// Render command lists // Render command lists
const bool flip = UsesLowerLeftOrigin(); const bool flip = UsesLowerLeftOrigin();
@ -708,20 +700,11 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y) if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y)
continue; continue;
GSVector4i clip = GSVector4i(GSVector4::load<false>(&pcmd->ClipRect.x));
if (flip) if (flip)
{ clip = FlipToLowerLeft(clip, swap_chain->GetHeight());
const s32 height = static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y);
const s32 flipped_y = static_cast<s32>(swap_chain->GetHeight()) - static_cast<s32>(pcmd->ClipRect.y) - height;
SetScissor(static_cast<s32>(pcmd->ClipRect.x), flipped_y, static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
height);
}
else
{
SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y),
static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y));
}
SetScissor(clip);
SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get()); SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get());
DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset); DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset);
} }

View File

@ -357,20 +357,20 @@ void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_c
{ {
if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan) if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
{ {
ss << "layout(push_constant) uniform PushConstants\n"; ss << "layout(push_constant, row_major) uniform PushConstants\n";
} }
else else
{ {
ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n"; ss << "layout(std140, row_major, set = 0, binding = 0) uniform UBOBlock\n";
m_has_uniform_buffer = true; m_has_uniform_buffer = true;
} }
} }
else if (m_glsl) else if (m_glsl)
{ {
if (m_use_glsl_binding_layout) if (m_use_glsl_binding_layout)
ss << "layout(std140, binding = 0) uniform UBOBlock\n"; ss << "layout(std140, row_major, binding = 0) uniform UBOBlock\n";
else else
ss << "layout(std140) uniform UBOBlock\n"; ss << "layout(std140, row_major) uniform UBOBlock\n";
m_has_uniform_buffer = true; m_has_uniform_buffer = true;
} }