GS: Less memcmp

It has a lot of work to do for its fancier return value
This commit is contained in:
TellowKrinkle 2021-12-12 21:55:33 -06:00 committed by lightningterror
parent 8395e00835
commit 1e7e23df96
4 changed files with 107 additions and 6 deletions

View File

@ -35,6 +35,64 @@ inline std::wstring convert_utf8_to_utf16(const std::string& utf8_string)
}
#endif
/// Like `memcmp(&a, &b, sizeof(T)) == 0` but faster
template <typename T>
__forceinline bool BitEqual(const T& a, const T& b)
{
#if _M_SSE >= 0x501
if (alignof(T) >= 32)
{
GSVector8i eq = GSVector8i::xffffffff();
for (size_t i = 0; i < sizeof(T) / 32; i++)
eq &= reinterpret_cast<const GSVector8i*>(&a)[i].eq8(reinterpret_cast<const GSVector8i*>(&b)[i]);
return eq.alltrue();
}
#endif
GSVector4i eq = GSVector4i::xffffffff();
if (alignof(T) >= 16)
{
for (size_t i = 0; i < sizeof(T) / 16; i++)
eq &= reinterpret_cast<const GSVector4i*>(&a)[i].eq8(reinterpret_cast<const GSVector4i*>(&b)[i]);
return eq.alltrue();
}
const char* ac = reinterpret_cast<const char*>(&a);
const char* bc = reinterpret_cast<const char*>(&b);
size_t i = 0;
if (sizeof(T) >= 16)
{
for (; i < sizeof(T) - 15; i += 16)
eq &= GSVector4i::load<false>(ac + i).eq8(GSVector4i::load<false>(bc + i));
}
if (i + 8 <= sizeof(T))
{
eq &= GSVector4i::loadl(ac + i).eq8(GSVector4i::loadl(bc + i));
i += 8;
}
bool eqb = eq.alltrue();
if (i + 4 <= sizeof(T))
{
u32 ai, bi;
memcpy(&ai, ac + i, sizeof(ai));
memcpy(&bi, bc + i, sizeof(bi));
eqb = ai == bi && eqb;
i += 4;
}
if (i + 2 <= sizeof(T))
{
u16 as, bs;
memcpy(&as, ac + i, sizeof(as));
memcpy(&bs, bc + i, sizeof(bs));
eqb = as == bs && eqb;
i += 2;
}
if (i != sizeof(T))
{
ASSERT(i + 1 == sizeof(T));
eqb = ac[i] == bc[i] && eqb;
}
return eqb;
}
// _wfopen has to be used on Windows for pathnames containing non-ASCII characters.
inline FILE* px_fopen(const std::string& filename, const std::string& mode)
{

View File

@ -20,6 +20,7 @@
#include "GSTexture.h"
#include "GSVertex.h"
#include "GS/GSAlignedClass.h"
#include "GS/GSExtra.h"
#include "GSOsdManager.h"
#include <array>
#ifdef _WIN32
@ -328,7 +329,7 @@ struct GSHWDrawConfig
ColorMaskSelector(): key(0xF) {}
ColorMaskSelector(u32 c): key(0) { wrgba = c; }
};
struct VSConstantBuffer
struct alignas(16) VSConstantBuffer
{
GSVector2 vertex_scale;
GSVector2 vertex_offset;
@ -336,8 +337,29 @@ struct GSHWDrawConfig
GSVector2 texture_offset;
GSVector2 point_size;
GSVector2i max_depth;
VSConstantBuffer()
{
memset(this, 0, sizeof(*this));
}
VSConstantBuffer(const VSConstantBuffer& other)
{
memcpy(this, &other, sizeof(*this));
}
VSConstantBuffer& operator=(const VSConstantBuffer& other)
{
new (this) VSConstantBuffer(other);
return *this;
}
bool operator==(const VSConstantBuffer& other) const
{
return BitEqual(*this, other);
}
bool operator!=(const VSConstantBuffer& other) const
{
return !(*this == other);
}
};
struct PSConstantBuffer
struct alignas(16) PSConstantBuffer
{
union
{
@ -396,6 +418,27 @@ struct GSHWDrawConfig
GSVector4 half_texel;
GSVector4 uv_min_max;
PSConstantBuffer()
{
memset(this, 0, sizeof(*this));
}
PSConstantBuffer(const PSConstantBuffer& other)
{
memcpy(this, &other, sizeof(*this));
}
PSConstantBuffer& operator=(const PSConstantBuffer& other)
{
new (this) PSConstantBuffer(other);
return *this;
}
bool operator==(const PSConstantBuffer& other) const
{
return BitEqual(*this, other);
}
bool operator!=(const PSConstantBuffer& other) const
{
return !(*this == other);
}
};
struct BlendState
{
@ -466,10 +509,10 @@ struct GSHWDrawConfig
struct AlphaSecondPass
{
bool enable;
PSSelector ps;
PSConstantBuffer cb_ps;
ColorMaskSelector colormask;
DepthStencilSelector depth;
PSSelector ps;
PSConstantBuffer cb_ps;
} alpha_second_pass;
};

View File

@ -1704,7 +1704,7 @@ void GSDevice11::RenderHW(GSHWDrawConfig& config)
if (config.alpha_second_pass.enable)
{
if (0 != memcmp(&config.cb_ps, &config.alpha_second_pass.cb_ps, sizeof(config.cb_ps)))
if (config.cb_ps != config.alpha_second_pass.cb_ps)
{
cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst);
}

View File

@ -2199,7 +2199,7 @@ void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
if (config.alpha_second_pass.enable)
{
if (0 != memcmp(&config.cb_ps, &config.alpha_second_pass.cb_ps, sizeof(config.cb_ps)))
if (config.cb_ps != config.alpha_second_pass.cb_ps)
{
cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst);
SetupCB(&cb_vs, &cb_ps);