GPUDevice: Use row-major matrix packing

With column vectors. mul() turns into dot products instead of madds.
This commit is contained in:
Stenzek 2024-11-13 14:35:13 +10:00
parent e22d67f4aa
commit acf04ed67a
No known key found for this signature in database
4 changed files with 16 additions and 30 deletions

View File

@ -5,4 +5,4 @@
#include "common/types.h"
static constexpr u32 SHADER_CACHE_VERSION = 22;
static constexpr u32 SHADER_CACHE_VERSION = 23;

View File

@ -475,8 +475,9 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShaderWithFXC(u32 shader_m
return {};
}
static constexpr UINT flags_non_debug = D3DCOMPILE_OPTIMIZATION_LEVEL3;
static constexpr UINT flags_debug = D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG;
static constexpr UINT flags_non_debug = D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_OPTIMIZATION_LEVEL3;
static constexpr UINT flags_debug =
D3DCOMPILE_PACK_MATRIX_ROW_MAJOR | D3DCOMPILE_SKIP_OPTIMIZATION | D3DCOMPILE_DEBUG;
Microsoft::WRL::ComPtr<ID3DBlob> blob;
Microsoft::WRL::ComPtr<ID3DBlob> error_blob;
@ -556,12 +557,14 @@ std::optional<DynamicHeapArray<u8>> D3DCommon::CompileShaderWithDXC(u32 shader_m
static constexpr const wchar_t* nondebug_arguments[] = {
L"-Qstrip_reflect",
L"-Qstrip_debug",
DXC_ARG_PACK_MATRIX_ROW_MAJOR,
DXC_ARG_OPTIMIZATION_LEVEL3,
};
static constexpr const wchar_t* debug_arguments[] = {
L"-Qstrip_reflect",
DXC_ARG_DEBUG,
L"-Qembed_debug",
DXC_ARG_PACK_MATRIX_ROW_MAJOR,
DXC_ARG_SKIP_OPTIMIZATIONS,
};
const wchar_t* const* arguments = debug_device ? debug_arguments : nondebug_arguments;

View File

@ -677,17 +677,9 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
SetPipeline(m_imgui_pipeline.get());
SetViewportAndScissor(0, 0, swap_chain->GetWidth(), swap_chain->GetHeight());
const float L = 0.0f;
const float R = static_cast<float>(swap_chain->GetWidth());
const float T = 0.0f;
const float B = static_cast<float>(swap_chain->GetHeight());
const float ortho_projection[4][4] = {
{2.0f / (R - L), 0.0f, 0.0f, 0.0f},
{0.0f, 2.0f / (T - B), 0.0f, 0.0f},
{0.0f, 0.0f, 0.5f, 0.0f},
{(R + L) / (L - R), (T + B) / (B - T), 0.5f, 1.0f},
};
PushUniformBuffer(ortho_projection, sizeof(ortho_projection));
const GSMatrix4x4 mproj = GSMatrix4x4::OffCenterOrthographicProjection(
0.0f, 0.0f, static_cast<float>(swap_chain->GetWidth()), static_cast<float>(swap_chain->GetHeight()), 0.0f, 1.0f);
PushUniformBuffer(&mproj, sizeof(mproj));
// Render command lists
const bool flip = UsesLowerLeftOrigin();
@ -708,20 +700,11 @@ void GPUDevice::RenderImGui(GPUSwapChain* swap_chain)
if (pcmd->ElemCount == 0 || pcmd->ClipRect.z <= pcmd->ClipRect.x || pcmd->ClipRect.w <= pcmd->ClipRect.y)
continue;
GSVector4i clip = GSVector4i(GSVector4::load<false>(&pcmd->ClipRect.x));
if (flip)
{
const s32 height = static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y);
const s32 flipped_y = static_cast<s32>(swap_chain->GetHeight()) - static_cast<s32>(pcmd->ClipRect.y) - height;
SetScissor(static_cast<s32>(pcmd->ClipRect.x), flipped_y, static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
height);
}
else
{
SetScissor(static_cast<s32>(pcmd->ClipRect.x), static_cast<s32>(pcmd->ClipRect.y),
static_cast<s32>(pcmd->ClipRect.z - pcmd->ClipRect.x),
static_cast<s32>(pcmd->ClipRect.w - pcmd->ClipRect.y));
}
clip = FlipToLowerLeft(clip, swap_chain->GetHeight());
SetScissor(clip);
SetTextureSampler(0, reinterpret_cast<GPUTexture*>(pcmd->TextureId), m_linear_sampler.get());
DrawIndexed(pcmd->ElemCount, base_index + pcmd->IdxOffset, base_vertex + pcmd->VtxOffset);
}

View File

@ -357,20 +357,20 @@ void ShaderGen::WriteUniformBufferDeclaration(std::stringstream& ss, bool push_c
{
if (m_render_api == RenderAPI::Vulkan && push_constant_on_vulkan)
{
ss << "layout(push_constant) uniform PushConstants\n";
ss << "layout(push_constant, row_major) uniform PushConstants\n";
}
else
{
ss << "layout(std140, set = 0, binding = 0) uniform UBOBlock\n";
ss << "layout(std140, row_major, set = 0, binding = 0) uniform UBOBlock\n";
m_has_uniform_buffer = true;
}
}
else if (m_glsl)
{
if (m_use_glsl_binding_layout)
ss << "layout(std140, binding = 0) uniform UBOBlock\n";
ss << "layout(std140, row_major, binding = 0) uniform UBOBlock\n";
else
ss << "layout(std140) uniform UBOBlock\n";
ss << "layout(std140, row_major) uniform UBOBlock\n";
m_has_uniform_buffer = true;
}