diff --git a/pcsx2/GS/GSExtra.h b/pcsx2/GS/GSExtra.h index c59828f2e4..42a0ab071c 100644 --- a/pcsx2/GS/GSExtra.h +++ b/pcsx2/GS/GSExtra.h @@ -35,6 +35,64 @@ inline std::wstring convert_utf8_to_utf16(const std::string& utf8_string) } #endif +/// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster +template +__forceinline bool BitEqual(const T& a, const T& b) +{ +#if _M_SSE >= 0x501 + if (alignof(T) >= 32) + { + GSVector8i eq = GSVector8i::xffffffff(); + for (size_t i = 0; i < sizeof(T) / 32; i++) + eq &= reinterpret_cast(&a)[i].eq8(reinterpret_cast(&b)[i]); + return eq.alltrue(); + } +#endif + GSVector4i eq = GSVector4i::xffffffff(); + if (alignof(T) >= 16) + { + for (size_t i = 0; i < sizeof(T) / 16; i++) + eq &= reinterpret_cast(&a)[i].eq8(reinterpret_cast(&b)[i]); + return eq.alltrue(); + } + const char* ac = reinterpret_cast(&a); + const char* bc = reinterpret_cast(&b); + size_t i = 0; + if (sizeof(T) >= 16) + { + for (; i < sizeof(T) - 15; i += 16) + eq &= GSVector4i::load(ac + i).eq8(GSVector4i::load(bc + i)); + } + if (i + 8 <= sizeof(T)) + { + eq &= GSVector4i::loadl(ac + i).eq8(GSVector4i::loadl(bc + i)); + i += 8; + } + bool eqb = eq.alltrue(); + if (i + 4 <= sizeof(T)) + { + u32 ai, bi; + memcpy(&ai, ac + i, sizeof(ai)); + memcpy(&bi, bc + i, sizeof(bi)); + eqb = ai == bi && eqb; + i += 4; + } + if (i + 2 <= sizeof(T)) + { + u16 as, bs; + memcpy(&as, ac + i, sizeof(as)); + memcpy(&bs, bc + i, sizeof(bs)); + eqb = as == bs && eqb; + i += 2; + } + if (i != sizeof(T)) + { + ASSERT(i + 1 == sizeof(T)); + eqb = ac[i] == bc[i] && eqb; + } + return eqb; +} + // _wfopen has to be used on Windows for pathnames containing non-ASCII characters. inline FILE* px_fopen(const std::string& filename, const std::string& mode) { diff --git a/pcsx2/GS/Renderers/Common/GSDevice.h b/pcsx2/GS/Renderers/Common/GSDevice.h index 7af1c279d9..ceacfea6df 100644 --- a/pcsx2/GS/Renderers/Common/GSDevice.h +++ b/pcsx2/GS/Renderers/Common/GSDevice.h @@ -20,6 +20,7 @@ #include "GSTexture.h" #include "GSVertex.h" #include "GS/GSAlignedClass.h" +#include "GS/GSExtra.h" #include "GSOsdManager.h" #include #ifdef _WIN32 @@ -328,7 +329,7 @@ struct GSHWDrawConfig ColorMaskSelector(): key(0xF) {} ColorMaskSelector(u32 c): key(0) { wrgba = c; } }; - struct VSConstantBuffer + struct alignas(16) VSConstantBuffer { GSVector2 vertex_scale; GSVector2 vertex_offset; @@ -336,8 +337,29 @@ struct GSHWDrawConfig GSVector2 texture_offset; GSVector2 point_size; GSVector2i max_depth; + VSConstantBuffer() + { + memset(this, 0, sizeof(*this)); + } + VSConstantBuffer(const VSConstantBuffer& other) + { + memcpy(this, &other, sizeof(*this)); + } + VSConstantBuffer& operator=(const VSConstantBuffer& other) + { + new (this) VSConstantBuffer(other); + return *this; + } + bool operator==(const VSConstantBuffer& other) const + { + return BitEqual(*this, other); + } + bool operator!=(const VSConstantBuffer& other) const + { + return !(*this == other); + } }; - struct PSConstantBuffer + struct alignas(16) PSConstantBuffer { union { @@ -396,6 +418,27 @@ struct GSHWDrawConfig GSVector4 half_texel; GSVector4 uv_min_max; + PSConstantBuffer() + { + memset(this, 0, sizeof(*this)); + } + PSConstantBuffer(const PSConstantBuffer& other) + { + memcpy(this, &other, sizeof(*this)); + } + PSConstantBuffer& operator=(const PSConstantBuffer& other) + { + new (this) PSConstantBuffer(other); + return *this; + } + bool operator==(const PSConstantBuffer& other) const + { + return BitEqual(*this, other); + } + bool operator!=(const PSConstantBuffer& other) const + { + return !(*this == other); + } }; struct BlendState { @@ -466,10 +509,10 @@ struct GSHWDrawConfig struct AlphaSecondPass { bool enable; - PSSelector ps; - PSConstantBuffer cb_ps; ColorMaskSelector colormask; DepthStencilSelector depth; + PSSelector ps; + PSConstantBuffer cb_ps; } alpha_second_pass; }; diff --git a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp index bc5d73dd21..b38dda4531 100644 --- a/pcsx2/GS/Renderers/DX11/GSDevice11.cpp +++ b/pcsx2/GS/Renderers/DX11/GSDevice11.cpp @@ -1704,7 +1704,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config) if (config.alpha_second_pass.enable) { - if (0 != memcmp(&config.cb_ps, &config.alpha_second_pass.cb_ps, sizeof(config.cb_ps))) + if (config.cb_ps != config.alpha_second_pass.cb_ps) { cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst); } diff --git a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp index fd74a4e918..cea1876d96 100644 --- a/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp +++ b/pcsx2/GS/Renderers/OpenGL/GSDeviceOGL.cpp @@ -2199,7 +2199,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config) if (config.alpha_second_pass.enable) { - if (0 != memcmp(&config.cb_ps, &config.alpha_second_pass.cb_ps, sizeof(config.cb_ps))) + if (config.cb_ps != config.alpha_second_pass.cb_ps) { cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst); SetupCB(&cb_vs, &cb_ps);