mirror of https://github.com/PCSX2/pcsx2.git
GS: Make g_const constant initialized
We never took advantage of the ability to relocate the pointer
This commit is contained in:
parent
66133b8b21
commit
bc74c9064a
|
@ -95,11 +95,6 @@ int GSinit()
|
||||||
|
|
||||||
GSUtil::Init();
|
GSUtil::Init();
|
||||||
|
|
||||||
if (g_const == nullptr)
|
|
||||||
return -1;
|
|
||||||
else
|
|
||||||
g_const->Init();
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -18,7 +18,7 @@
|
||||||
#include "GSTextureCacheSW.h"
|
#include "GSTextureCacheSW.h"
|
||||||
|
|
||||||
// Lack of a better home
|
// Lack of a better home
|
||||||
std::unique_ptr<GSScanlineConstantData> g_const(new GSScanlineConstantData());
|
constexpr GSScanlineConstantData g_const;
|
||||||
|
|
||||||
GSDrawScanline::GSDrawScanline()
|
GSDrawScanline::GSDrawScanline()
|
||||||
: m_sp_map("GSSetupPrim", &m_local)
|
: m_sp_map("GSSetupPrim", &m_local)
|
||||||
|
@ -131,10 +131,10 @@ void GSDrawScanline::CSetupPrim(const GSVertexSW* vertex, const u32* index, cons
|
||||||
constexpr int vlen = sizeof(VectorF) / sizeof(float);
|
constexpr int vlen = sizeof(VectorF) / sizeof(float);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
const GSVector8* shift = (GSVector8*)g_const->m_shift_256b;
|
const GSVector8* shift = (GSVector8*)g_const.m_shift_256b;
|
||||||
const GSVector4 step_shift = GSVector4::broadcast32(&shift[0]);
|
const GSVector4 step_shift = GSVector4::broadcast32(&shift[0]);
|
||||||
#else
|
#else
|
||||||
const GSVector4* shift = (GSVector4*)g_const->m_shift_128b;
|
const GSVector4* shift = (GSVector4*)g_const.m_shift_128b;
|
||||||
const GSVector4 step_shift = shift[0];
|
const GSVector4 step_shift = shift[0];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -308,7 +308,7 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
|
||||||
constexpr int vlen = sizeof(VectorF) / sizeof(float);
|
constexpr int vlen = sizeof(VectorF) / sizeof(float);
|
||||||
|
|
||||||
#if _M_SSE < 0x501
|
#if _M_SSE < 0x501
|
||||||
const GSVector4i* const_test = (GSVector4i*)g_const->m_test_128b;
|
const GSVector4i* const_test = (GSVector4i*)g_const.m_test_128b;
|
||||||
#endif
|
#endif
|
||||||
VectorI test;
|
VectorI test;
|
||||||
VectorF z0, z1;
|
VectorF z0, z1;
|
||||||
|
@ -328,7 +328,7 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
|
||||||
steps = pixels + skip - vlen;
|
steps = pixels + skip - vlen;
|
||||||
left -= skip;
|
left -= skip;
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
test = GSVector8i::i8to32(g_const->m_test_256b[skip]) | GSVector8i::i8to32(g_const->m_test_256b[15 + (steps & (steps >> 31))]);
|
test = GSVector8i::i8to32(g_const.m_test_256b[skip]) | GSVector8i::i8to32(g_const.m_test_256b[15 + (steps & (steps >> 31))]);
|
||||||
#else
|
#else
|
||||||
test = const_test[skip] | const_test[7 + (steps & (steps >> 31))];
|
test = const_test[skip] | const_test[7 + (steps & (steps >> 31))];
|
||||||
#endif
|
#endif
|
||||||
|
@ -1569,7 +1569,7 @@ void GSDrawScanline::CDrawScanline(int pixels, int left, int top, const GSVertex
|
||||||
if (!sel.notest)
|
if (!sel.notest)
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
test = GSVector8i::i8to32(g_const->m_test_256b[15 + (steps & (steps >> 31))]);
|
test = GSVector8i::i8to32(g_const.m_test_256b[15 + (steps & (steps >> 31))]);
|
||||||
#else
|
#else
|
||||||
test = const_test[7 + (steps & (steps >> 31))];
|
test = const_test[7 + (steps & (steps >> 31))];
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -95,7 +95,7 @@ GSDrawScanlineCodeGenerator2::GSDrawScanlineCodeGenerator2(Xbyak::CodeGenerator*
|
||||||
, t0(r8) , t1(r9)
|
, t0(r8) , t1(r9)
|
||||||
, t2(rcx), t3(rsi)
|
, t2(rcx), t3(rsi)
|
||||||
#endif
|
#endif
|
||||||
, _g_const(chooseLocal(&*g_const, _64_g_const))
|
, _g_const(chooseLocal(&g_const, _64_g_const))
|
||||||
, _m_local(chooseLocal(&m_local, _64_m_local))
|
, _m_local(chooseLocal(&m_local, _64_m_local))
|
||||||
, _m_local__gd(chooseLocal(m_local.gd, _64_m_local__gd))
|
, _m_local__gd(chooseLocal(m_local.gd, _64_m_local__gd))
|
||||||
, _m_local__gd__vm(chooseLocal(m_local.gd->vm, _64_m_local__gd__vm))
|
, _m_local__gd__vm(chooseLocal(m_local.gd->vm, _64_m_local__gd__vm))
|
||||||
|
@ -366,7 +366,7 @@ void GSDrawScanlineCodeGenerator2::Generate()
|
||||||
mov(ptr[rsp + _64_rz_r14], r14);
|
mov(ptr[rsp + _64_rz_r14], r14);
|
||||||
mov(ptr[rsp + _64_rz_r15], r15);
|
mov(ptr[rsp + _64_rz_r15], r15);
|
||||||
#endif
|
#endif
|
||||||
mov(_64_g_const, (size_t)&*g_const);
|
mov(_64_g_const, (size_t)&g_const);
|
||||||
if (!m_rip)
|
if (!m_rip)
|
||||||
{
|
{
|
||||||
mov(_64_m_local, (size_t)&m_local);
|
mov(_64_m_local, (size_t)&m_local);
|
||||||
|
|
|
@ -233,84 +233,61 @@ struct alignas(32) GSScanlineLocalData // per prim variables, each thread has it
|
||||||
};
|
};
|
||||||
|
|
||||||
// Constant shared by all threads (to reduce cache miss)
|
// Constant shared by all threads (to reduce cache miss)
|
||||||
//
|
|
||||||
// Note: Avoid GSVector* to support all ISA at once
|
|
||||||
//
|
|
||||||
// WARNING: Don't use static storage. Static variables are relocated to random
|
|
||||||
// location (above 2GB). Small allocation on the heap could be below 2GB, this way we can use
|
|
||||||
// absolute addressing. Otherwise we need to store a base address in a register.
|
|
||||||
struct GSScanlineConstantData : public GSAlignedClass<32>
|
struct GSScanlineConstantData : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
alignas(32) u8 m_test_256b[16][8];
|
alignas(32) u8 m_test_256b[16][8] = {
|
||||||
alignas(32) float m_shift_256b[9][8];
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
alignas(32) float m_log2_coef_256b[4][8];
|
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
|
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
|
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
|
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||||
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||||
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||||
|
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||||
|
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||||
|
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||||
|
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||||
|
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
|
||||||
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
|
||||||
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
|
||||||
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
|
||||||
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
|
};
|
||||||
|
alignas(32) float m_shift_256b[9][8] = {
|
||||||
|
{ 8.0f , 8.0f , 8.0f , 8.0f , 8.0f , 8.0f , 8.0f , 8.0f},
|
||||||
|
{ 0.0f , 1.0f , 2.0f , 3.0f , 4.0f , 5.0f , 6.0f , 7.0f},
|
||||||
|
{ -1.0f , 0.0f , 1.0f , 2.0f , 3.0f , 4.0f , 5.0f , 6.0f},
|
||||||
|
{ -2.0f , -1.0f , 0.0f , 1.0f , 2.0f , 3.0f , 4.0f , 5.0f},
|
||||||
|
{ -3.0f , -2.0f , -1.0f , 0.0f , 1.0f , 2.0f , 3.0f , 4.0f},
|
||||||
|
{ -4.0f , -3.0f , -2.0f , -1.0f , 0.0f , 1.0f , 2.0f , 3.0f},
|
||||||
|
{ -5.0f , -4.0f , -3.0f , -2.0f , -1.0f , 0.0f , 1.0f , 2.0f},
|
||||||
|
{ -6.0f , -5.0f , -4.0f , -3.0f , -2.0f , -1.0f , 0.0f , 1.0f},
|
||||||
|
{ -7.0f , -6.0f , -5.0f , -4.0f , -3.0f , -2.0f , -1.0f , 0.0f},
|
||||||
|
};
|
||||||
|
alignas(32) float m_log2_coef_256b[4][8] = {};
|
||||||
|
|
||||||
alignas(16) u32 m_test_128b[8][4];
|
alignas(16) u32 m_test_128b[8][4] = {
|
||||||
alignas(16) float m_shift_128b[5][4];
|
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
|
||||||
alignas(16) float m_log2_coef_128b[4][4];
|
{0xffffffff, 0x00000000, 0x00000000, 0x00000000},
|
||||||
|
{0xffffffff, 0xffffffff, 0x00000000, 0x00000000},
|
||||||
|
{0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
|
||||||
|
{0x00000000, 0xffffffff, 0xffffffff, 0xffffffff},
|
||||||
|
{0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
|
||||||
|
{0x00000000, 0x00000000, 0x00000000, 0xffffffff},
|
||||||
|
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
|
||||||
|
};
|
||||||
|
alignas(16) float m_shift_128b[5][4] = {
|
||||||
|
{ 4.0f , 4.0f , 4.0f , 4.0f},
|
||||||
|
{ 0.0f , 1.0f , 2.0f , 3.0f},
|
||||||
|
{ -1.0f , 0.0f , 1.0f , 2.0f},
|
||||||
|
{ -2.0f , -1.0f , 0.0f , 1.0f},
|
||||||
|
{ -3.0f , -2.0f , -1.0f , 0.0f},
|
||||||
|
};
|
||||||
|
alignas(16) float m_log2_coef_128b[4][4] = {};
|
||||||
|
|
||||||
GSScanlineConstantData() {}
|
constexpr GSScanlineConstantData()
|
||||||
|
|
||||||
// GCC will be clever enough to stick some AVX instruction here
|
|
||||||
// So it must be defered to post global constructor
|
|
||||||
void Init()
|
|
||||||
{
|
{
|
||||||
u8 I_hate_vs2013_m_test_256b[16][8] = {
|
constexpr float log2_coef[] = {
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
|
||||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
|
||||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
|
||||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
|
||||||
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
|
||||||
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
|
||||||
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
|
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}
|
|
||||||
};
|
|
||||||
|
|
||||||
u32 I_hate_vs2013_m_test_128b[8][4] = {
|
|
||||||
{0x00000000, 0x00000000, 0x00000000, 0x00000000},
|
|
||||||
{0xffffffff, 0x00000000, 0x00000000, 0x00000000},
|
|
||||||
{0xffffffff, 0xffffffff, 0x00000000, 0x00000000},
|
|
||||||
{0xffffffff, 0xffffffff, 0xffffffff, 0x00000000},
|
|
||||||
{0x00000000, 0xffffffff, 0xffffffff, 0xffffffff},
|
|
||||||
{0x00000000, 0x00000000, 0xffffffff, 0xffffffff},
|
|
||||||
{0x00000000, 0x00000000, 0x00000000, 0xffffffff},
|
|
||||||
{0x00000000, 0x00000000, 0x00000000, 0x00000000}
|
|
||||||
};
|
|
||||||
|
|
||||||
float I_hate_vs2013_m_shift_256b[9][8] = {
|
|
||||||
{ 8.0f , 8.0f , 8.0f , 8.0f , 8.0f , 8.0f , 8.0f , 8.0f},
|
|
||||||
{ 0.0f , 1.0f , 2.0f , 3.0f , 4.0f , 5.0f , 6.0f , 7.0f},
|
|
||||||
{ -1.0f , 0.0f , 1.0f , 2.0f , 3.0f , 4.0f , 5.0f , 6.0f},
|
|
||||||
{ -2.0f , -1.0f , 0.0f , 1.0f , 2.0f , 3.0f , 4.0f , 5.0f},
|
|
||||||
{ -3.0f , -2.0f , -1.0f , 0.0f , 1.0f , 2.0f , 3.0f , 4.0f},
|
|
||||||
{ -4.0f , -3.0f , -2.0f , -1.0f , 0.0f , 1.0f , 2.0f , 3.0f},
|
|
||||||
{ -5.0f , -4.0f , -3.0f , -2.0f , -1.0f , 0.0f , 1.0f , 2.0f},
|
|
||||||
{ -6.0f , -5.0f , -4.0f , -3.0f , -2.0f , -1.0f , 0.0f , 1.0f},
|
|
||||||
{ -7.0f , -6.0f , -5.0f , -4.0f , -3.0f , -2.0f , -1.0f , 0.0f}
|
|
||||||
};
|
|
||||||
|
|
||||||
float I_hate_vs2013_m_shift_128b[5][4] = {
|
|
||||||
{ 4.0f , 4.0f , 4.0f , 4.0f},
|
|
||||||
{ 0.0f , 1.0f , 2.0f , 3.0f},
|
|
||||||
{ -1.0f , 0.0f , 1.0f , 2.0f},
|
|
||||||
{ -2.0f , -1.0f , 0.0f , 1.0f},
|
|
||||||
{ -3.0f , -2.0f , -1.0f , 0.0f}
|
|
||||||
};
|
|
||||||
|
|
||||||
memcpy(m_test_256b, I_hate_vs2013_m_test_256b, sizeof(I_hate_vs2013_m_test_256b));
|
|
||||||
memcpy(m_test_128b, I_hate_vs2013_m_test_128b, sizeof(I_hate_vs2013_m_test_128b));
|
|
||||||
memcpy(m_shift_256b, I_hate_vs2013_m_shift_256b, sizeof(I_hate_vs2013_m_shift_256b));
|
|
||||||
memcpy(m_shift_128b, I_hate_vs2013_m_shift_128b, sizeof(I_hate_vs2013_m_shift_128b));
|
|
||||||
|
|
||||||
float log2_coef[] = {
|
|
||||||
0.204446009836232697516f,
|
0.204446009836232697516f,
|
||||||
-1.04913055217340124191f,
|
-1.04913055217340124191f,
|
||||||
2.28330284476918490682f,
|
2.28330284476918490682f,
|
||||||
|
@ -329,4 +306,4 @@ struct GSScanlineConstantData : public GSAlignedClass<32>
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
extern std::unique_ptr<GSScanlineConstantData> g_const;
|
extern const GSScanlineConstantData g_const;
|
||||||
|
|
|
@ -122,9 +122,9 @@ void GSSetupPrimCodeGenerator2::Generate()
|
||||||
{
|
{
|
||||||
|
|
||||||
if (isXmm)
|
if (isXmm)
|
||||||
mov(rax, (size_t)g_const->m_shift_128b);
|
mov(rax, (size_t)g_const.m_shift_128b);
|
||||||
else
|
else
|
||||||
mov(rax, (size_t)g_const->m_shift_256b);
|
mov(rax, (size_t)g_const.m_shift_256b);
|
||||||
|
|
||||||
for (int i = 0; i < (m_sel.notest ? 2 : many_regs ? 9 : 5); i++)
|
for (int i = 0; i < (m_sel.notest ? 2 : many_regs ? 9 : 5); i++)
|
||||||
{
|
{
|
||||||
|
@ -269,7 +269,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
vmulps(ymm0, Ymm(4 + i), ymm1);
|
vmulps(ymm0, Ymm(4 + i), ymm1);
|
||||||
else
|
else
|
||||||
vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm0, ymm1, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
cvttps2dq(ymm0, ymm0);
|
cvttps2dq(ymm0, ymm0);
|
||||||
pshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
pshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
pshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
pshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||||
|
@ -297,7 +297,7 @@ void GSSetupPrimCodeGenerator2::Depth_YMM()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
vmulps(ymm1, Ymm(4 + i), ymm0);
|
vmulps(ymm1, Ymm(4 + i), ymm0);
|
||||||
else
|
else
|
||||||
vmulps(ymm1, ymm0, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm1, ymm0, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
movaps(_rip_local(d[i].z), ymm1);
|
movaps(_rip_local(d[i].z), ymm1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -372,7 +372,7 @@ void GSSetupPrimCodeGenerator2::Texture()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
THREEARG(mulps, xym2, XYm(4 + i), xym1);
|
THREEARG(mulps, xym2, XYm(4 + i), xym1);
|
||||||
else
|
else
|
||||||
vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm2, ymm1, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
|
|
||||||
if (m_sel.fst)
|
if (m_sel.fst)
|
||||||
{
|
{
|
||||||
|
@ -440,7 +440,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
THREEARG(mulps, xym0, XYm(4 + i), xym2);
|
THREEARG(mulps, xym0, XYm(4 + i), xym2);
|
||||||
else
|
else
|
||||||
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm0, ymm2, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
cvttps2dq(xym0, xym0);
|
cvttps2dq(xym0, xym0);
|
||||||
packssdw(xym0, xym0);
|
packssdw(xym0, xym0);
|
||||||
|
|
||||||
|
@ -449,7 +449,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
THREEARG(mulps, xym1, XYm(4 + i), xym3);
|
THREEARG(mulps, xym1, XYm(4 + i), xym3);
|
||||||
else
|
else
|
||||||
vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm1, ymm3, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
cvttps2dq(xym1, xym1);
|
cvttps2dq(xym1, xym1);
|
||||||
packssdw(xym1, xym1);
|
packssdw(xym1, xym1);
|
||||||
|
|
||||||
|
@ -476,7 +476,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
THREEARG(mulps, xym0, XYm(4 + i), xym2);
|
THREEARG(mulps, xym0, XYm(4 + i), xym2);
|
||||||
else
|
else
|
||||||
vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm0, ymm2, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
cvttps2dq(xym0, xym0);
|
cvttps2dq(xym0, xym0);
|
||||||
packssdw(xym0, xym0);
|
packssdw(xym0, xym0);
|
||||||
|
|
||||||
|
@ -485,7 +485,7 @@ void GSSetupPrimCodeGenerator2::Color()
|
||||||
if (i < 4 || many_regs)
|
if (i < 4 || many_regs)
|
||||||
THREEARG(mulps, xym1, XYm(4 + i), xym3);
|
THREEARG(mulps, xym1, XYm(4 + i), xym3);
|
||||||
else
|
else
|
||||||
vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
|
vmulps(ymm1, ymm3, ptr[g_const.m_shift_256b[i + 1]]);
|
||||||
cvttps2dq(xym1, xym1);
|
cvttps2dq(xym1, xym1);
|
||||||
packssdw(xym1, xym1);
|
packssdw(xym1, xym1);
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue