mirror of https://github.com/PCSX2/pcsx2.git
gsdx sw: port code to the new constant object
This commit is contained in:
parent
3b5bc9c38d
commit
8431299b92
|
@ -131,12 +131,10 @@ EXPORT_C_(int) GSinit()
|
|||
|
||||
GSBlock::InitVectors();
|
||||
GSClut::InitVectors();
|
||||
GSDrawScanlineCodeGenerator::InitVectors();
|
||||
#ifdef ENABLE_OPENCL
|
||||
GSRendererCL::InitVectors();
|
||||
#endif
|
||||
GSRendererSW::InitVectors();
|
||||
GSSetupPrimCodeGenerator::InitVectors();
|
||||
GSVector4i::InitVectors();
|
||||
GSVector4::InitVectors();
|
||||
#if _M_SSE >= 0x500
|
||||
|
@ -147,6 +145,9 @@ EXPORT_C_(int) GSinit()
|
|||
#endif
|
||||
GSVertexTrace::InitVectors();
|
||||
|
||||
if (g_const == nullptr)
|
||||
return -1;
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
||||
|
@ -165,8 +166,7 @@ EXPORT_C GSshutdown()
|
|||
gsopen_done = false;
|
||||
|
||||
delete s_gs;
|
||||
|
||||
s_gs = NULL;
|
||||
s_gs = nullptr;
|
||||
|
||||
s_renderer = GSRendererType::Undefined;
|
||||
|
||||
|
|
|
@ -117,7 +117,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
const GSVector8* shift = GSSetupPrimCodeGenerator::m_shift;
|
||||
const GSVector8* shift = (GSVector8*)g_const->m_shift_256b;
|
||||
|
||||
if(has_z || has_f)
|
||||
{
|
||||
|
@ -271,7 +271,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co
|
|||
|
||||
#else
|
||||
|
||||
const GSVector4* shift = GSSetupPrimCodeGenerator::m_shift;
|
||||
const GSVector4* shift = (GSVector4*)g_const->m_shift_128b;
|
||||
|
||||
if(has_z || has_f)
|
||||
{
|
||||
|
@ -441,7 +441,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
skip = left & 7;
|
||||
steps = pixels + skip - 8;
|
||||
left -= skip;
|
||||
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[skip]) | GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
|
||||
test = GSVector8i::i8to32c(g_const->m_test_256b[skip]) | GSVector8i::i8to32c(g_const->m_test_256b[15 + (steps & (steps >> 31))]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1532,12 +1532,13 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if(!sel.notest)
|
||||
{
|
||||
test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]);
|
||||
test = GSVector8i::i8to32c(g_const->m_test_256b[15 + (steps & (steps >> 31))]);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
const GSVector4i* const_test = (GSVector4i*)g_const->m_test_128b;
|
||||
GSVector4i test;
|
||||
GSVector4 zo;
|
||||
GSVector4i f;
|
||||
|
@ -1555,7 +1556,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
skip = left & 3;
|
||||
steps = pixels + skip - 4;
|
||||
left -= skip;
|
||||
test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
test = const_test[skip] | const_test[7 + (steps & (steps >> 31))];
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -2625,7 +2626,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS
|
|||
|
||||
if(!sel.notest)
|
||||
{
|
||||
test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
test = const_test[7 + (steps & (steps >> 31))];
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -33,78 +33,6 @@ void GSDrawScanlineCodeGenerator::Generate()
|
|||
}
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] =
|
||||
{
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00},
|
||||
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00},
|
||||
{0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff},
|
||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
};
|
||||
|
||||
GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4];
|
||||
#else
|
||||
GSVector4i GSDrawScanlineCodeGenerator::m_test[8];
|
||||
GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4];
|
||||
#endif
|
||||
|
||||
void GSDrawScanlineCodeGenerator::InitVectors()
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 log2_coef[4] =
|
||||
{
|
||||
GSVector8(0.204446009836232697516f),
|
||||
GSVector8(-1.04913055217340124191f),
|
||||
GSVector8(2.28330284476918490682f),
|
||||
GSVector8(1.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(log2_coef); ++n)
|
||||
m_log2_coef[n] = log2_coef[n];
|
||||
|
||||
#else
|
||||
GSVector4i test[8] =
|
||||
{
|
||||
GSVector4i::zero(),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector4i::zero(),
|
||||
};
|
||||
|
||||
GSVector4 log2_coef[4] =
|
||||
{
|
||||
GSVector4(0.204446009836232697516f),
|
||||
GSVector4(-1.04913055217340124191f),
|
||||
GSVector4(2.28330284476918490682f),
|
||||
GSVector4(1.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(test); ++n)
|
||||
m_test[n] = test[n];
|
||||
|
||||
for (size_t n = 0; n < countof(log2_coef); ++n)
|
||||
m_log2_coef[n] = log2_coef[n];
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_local(*(GSScanlineLocalData*)param)
|
||||
|
|
|
@ -143,14 +143,4 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
alignas(8) static const uint8 m_test[16][8];
|
||||
static GSVector8 m_log2_coef[4];
|
||||
#else
|
||||
static GSVector4i m_test[8];
|
||||
static GSVector4 m_log2_coef[4];
|
||||
#endif
|
||||
|
||||
static void InitVectors();
|
||||
};
|
||||
|
|
|
@ -96,7 +96,7 @@ void GSDrawScanlineCodeGenerator::Generate_AVX()
|
|||
}
|
||||
#endif
|
||||
|
||||
mov(r10, (size_t)&m_test[0]);
|
||||
mov(r10, (size_t)g_const->m_test_128b[0]);
|
||||
if (!m_rip)
|
||||
{
|
||||
mov(_m_local, (size_t)&m_local);
|
||||
|
|
|
@ -103,7 +103,7 @@ void GSDrawScanlineCodeGenerator::Generate()
|
|||
}
|
||||
#endif
|
||||
|
||||
mov(r10, (size_t)&m_test[0]);
|
||||
mov(r10, (size_t)g_const->m_test_256b[0]);
|
||||
if (!m_rip)
|
||||
{
|
||||
mov(_m_local, (size_t)&m_local);
|
||||
|
@ -363,8 +363,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]);
|
||||
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]);
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[0]]);
|
||||
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)g_const->m_test_256b[15]]);
|
||||
vpor(ymm7, ymm0);
|
||||
|
||||
shl(edx, 5);
|
||||
|
@ -683,7 +683,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]);
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[15]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1261,25 +1261,25 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
vpslld(ymm4, ymm4, 9);
|
||||
vpsrld(ymm4, ymm4, 9);
|
||||
vorps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vorps(ymm4, ptr[g_const->m_log2_coef_256b[3]]);
|
||||
|
||||
// ymm4 = mant(q) | 1.0f
|
||||
|
||||
if(m_cpu.has(util::Cpu::tFMA))
|
||||
{
|
||||
vmovaps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0
|
||||
vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * ymm4 + c1
|
||||
vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * ymm4 + c1) * ymm4 + c2
|
||||
vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // ymm4 - 1.0f
|
||||
vmovaps(ymm5, ptr[g_const->m_log2_coef_256b[0]]); // c0
|
||||
vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[1]]); // c0 * ymm4 + c1
|
||||
vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[2]]); // (c0 * ymm4 + c1) * ymm4 + c2
|
||||
vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); // ymm4 - 1.0f
|
||||
vfmadd213ps(ymm4, ymm5, ymm0); // ((c0 * ymm4 + c1) * ymm4 + c2) * (ymm4 - 1.0f) + ymm0
|
||||
}
|
||||
else
|
||||
{
|
||||
vmulps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
|
||||
vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
|
||||
vmulps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[0]]);
|
||||
vaddps(ymm5, ptr[g_const->m_log2_coef_256b[1]]);
|
||||
vmulps(ymm5, ymm4);
|
||||
vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
|
||||
vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]);
|
||||
vaddps(ymm5, ptr[g_const->m_log2_coef_256b[2]]);
|
||||
vmulps(ymm4, ymm5);
|
||||
vaddps(ymm4, ymm0);
|
||||
}
|
||||
|
|
|
@ -269,14 +269,14 @@ void GSDrawScanlineCodeGenerator::Init_AVX()
|
|||
|
||||
shl(edx, 4);
|
||||
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[0]]);
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
|
||||
vpor(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
vpor(xmm7, ptr[eax + (size_t)g_const->m_test_128b[7]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -591,7 +591,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX()
|
|||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
vmovdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[7]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1147,25 +1147,25 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX()
|
|||
|
||||
vpslld(xmm4, xmm4, 9);
|
||||
vpsrld(xmm4, xmm4, 9);
|
||||
vorps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vorps(xmm4, ptr[g_const->m_log2_coef_128b[3]]);
|
||||
|
||||
// xmm4 = mant(q) | 1.0f
|
||||
|
||||
if(m_cpu.has(util::Cpu::tFMA))
|
||||
{
|
||||
vmovaps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0
|
||||
vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * xmm4 + c1
|
||||
vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * xmm4 + c1) * xmm4 + c2
|
||||
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // xmm4 - 1.0f
|
||||
vmovaps(xmm5, ptr[g_const->m_log2_coef_128b[0]]); // c0
|
||||
vfmadd213ps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[1]]); // c0 * xmm4 + c1
|
||||
vfmadd213ps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[2]]); // (c0 * xmm4 + c1) * xmm4 + c2
|
||||
vsubps(xmm4, ptr[g_const->m_log2_coef_128b[3]]); // xmm4 - 1.0f
|
||||
vfmadd213ps(xmm4, xmm5, xmm0); // ((c0 * xmm4 + c1) * xmm4 + c2) * (xmm4 - 1.0f) + xmm0
|
||||
}
|
||||
else
|
||||
{
|
||||
vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
|
||||
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
|
||||
vmulps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[0]]);
|
||||
vaddps(xmm5, ptr[g_const->m_log2_coef_128b[1]]);
|
||||
vmulps(xmm5, xmm4);
|
||||
vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
|
||||
vsubps(xmm4, ptr[g_const->m_log2_coef_128b[3]]);
|
||||
vaddps(xmm5, ptr[g_const->m_log2_coef_128b[2]]);
|
||||
vmulps(xmm4, xmm5);
|
||||
vaddps(xmm4, xmm0);
|
||||
}
|
||||
|
|
|
@ -273,8 +273,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]);
|
||||
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]);
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[0]]);
|
||||
vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)g_const->m_test_256b[15]]);
|
||||
vpor(ymm7, ymm0);
|
||||
|
||||
shl(edx, 5);
|
||||
|
@ -593,7 +593,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
sar(edx, 31);
|
||||
and(edx, ecx);
|
||||
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]);
|
||||
vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[15]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1163,25 +1163,25 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
vpslld(ymm4, ymm4, 9);
|
||||
vpsrld(ymm4, ymm4, 9);
|
||||
vorps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vorps(ymm4, ptr[g_const->m_log2_coef_256b[3]]);
|
||||
|
||||
// ymm4 = mant(q) | 1.0f
|
||||
|
||||
if(m_cpu.has(util::Cpu::tFMA))
|
||||
{
|
||||
vmovaps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0
|
||||
vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * ymm4 + c1
|
||||
vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * ymm4 + c1) * ymm4 + c2
|
||||
vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // ymm4 - 1.0f
|
||||
vmovaps(ymm5, ptr[g_const->m_log2_coef_256b[0]]); // c0
|
||||
vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[1]]); // c0 * ymm4 + c1
|
||||
vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[2]]); // (c0 * ymm4 + c1) * ymm4 + c2
|
||||
vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); // ymm4 - 1.0f
|
||||
vfmadd213ps(ymm4, ymm5, ymm0); // ((c0 * ymm4 + c1) * ymm4 + c2) * (ymm4 - 1.0f) + ymm0
|
||||
}
|
||||
else
|
||||
{
|
||||
vmulps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
|
||||
vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
|
||||
vmulps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[0]]);
|
||||
vaddps(ymm5, ptr[g_const->m_log2_coef_256b[1]]);
|
||||
vmulps(ymm5, ymm4);
|
||||
vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
|
||||
vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]);
|
||||
vaddps(ymm5, ptr[g_const->m_log2_coef_256b[2]]);
|
||||
vmulps(ymm4, ymm5);
|
||||
vaddps(ymm4, ymm0);
|
||||
}
|
||||
|
|
|
@ -269,14 +269,14 @@ void GSDrawScanlineCodeGenerator::Init_SSE()
|
|||
|
||||
shl(edx, 4);
|
||||
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]);
|
||||
movdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[0]]);
|
||||
|
||||
mov(eax, ecx);
|
||||
sar(eax, 31);
|
||||
and(eax, ecx);
|
||||
shl(eax, 4);
|
||||
|
||||
por(xmm7, ptr[eax + (size_t)&m_test[7]]);
|
||||
por(xmm7, ptr[eax + (size_t)g_const->m_test_128b[7]]);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -596,7 +596,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE()
|
|||
and(edx, ecx);
|
||||
shl(edx, 4);
|
||||
|
||||
movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]);
|
||||
movdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[7]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1189,16 +1189,16 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE()
|
|||
|
||||
pslld(xmm4, 9);
|
||||
psrld(xmm4, 9);
|
||||
orps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
orps(xmm4, ptr[g_const->m_log2_coef_128b[3]]);
|
||||
|
||||
// xmm4 = mant(q) | 1.0f
|
||||
|
||||
movdqa(xmm5, xmm4);
|
||||
mulps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]);
|
||||
addps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]);
|
||||
mulps(xmm5, ptr[g_const->m_log2_coef_128b[0]]);
|
||||
addps(xmm5, ptr[g_const->m_log2_coef_128b[1]]);
|
||||
mulps(xmm5, xmm4);
|
||||
subps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]);
|
||||
addps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]);
|
||||
subps(xmm4, ptr[g_const->m_log2_coef_128b[3]]);
|
||||
addps(xmm5, ptr[g_const->m_log2_coef_128b[2]]);
|
||||
mulps(xmm4, xmm5);
|
||||
addps(xmm4, xmm0);
|
||||
|
||||
|
|
|
@ -24,46 +24,6 @@
|
|||
|
||||
using namespace Xbyak;
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 GSSetupPrimCodeGenerator::m_shift[9];
|
||||
#else
|
||||
GSVector4 GSSetupPrimCodeGenerator::m_shift[5];
|
||||
#endif
|
||||
|
||||
void GSSetupPrimCodeGenerator::InitVectors()
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 shift[9] =
|
||||
{
|
||||
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
|
||||
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
|
||||
GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f),
|
||||
GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f),
|
||||
GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f),
|
||||
GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(shift); ++n)
|
||||
m_shift[n] = shift[n];
|
||||
|
||||
#else
|
||||
GSVector4 shift[5] =
|
||||
{
|
||||
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
|
||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(shift); ++n)
|
||||
m_shift[n] = shift[n];
|
||||
#endif
|
||||
}
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
, m_local(*(GSScanlineLocalData*)param)
|
||||
|
|
|
@ -54,12 +54,4 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
static GSVector8 m_shift[9];
|
||||
#else
|
||||
static GSVector4 m_shift[5];
|
||||
#endif
|
||||
|
||||
static void InitVectors();
|
||||
};
|
||||
|
|
|
@ -48,7 +48,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX()
|
|||
|
||||
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
||||
{
|
||||
mov(rax, (size_t)&m_shift[0]);
|
||||
mov(rax, (size_t)g_const->m_shift_128b);
|
||||
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
|
|
|
@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX2()
|
|||
|
||||
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
||||
{
|
||||
mov(rax, (size_t)&m_shift[0]);
|
||||
mov(rax, (size_t)g_const->m_shift_256b);
|
||||
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 9); i++)
|
||||
{
|
||||
|
|
|
@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_SSE()
|
|||
|
||||
if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip)
|
||||
{
|
||||
mov(rax, (size_t)&m_shift[0]);
|
||||
mov(rax, (size_t)g_const->m_shift_128b[0]);
|
||||
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
|
|
|
@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX()
|
|||
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
vmovaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
vmovaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX2()
|
|||
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
vmovaps(Ymm(3 + i), ptr[&m_shift[i]]);
|
||||
vmovaps(Ymm(3 + i), ptr[g_const->m_shift_256b[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2()
|
|||
// m_local.d[i].z = dz * shift[1 + i];
|
||||
|
||||
if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i));
|
||||
else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
vmovaps(ptr[&m_local.d[i].z], ymm0);
|
||||
}
|
||||
|
||||
|
@ -113,7 +113,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2()
|
|||
// m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh();
|
||||
|
||||
if(i < 4) vmulps(ymm0, ymm1, Ymm(4 + i));
|
||||
else vmulps(ymm0, ymm1, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
vcvttps2dq(ymm0, ymm0);
|
||||
vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0));
|
||||
|
@ -190,7 +190,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX2()
|
|||
// GSVector8 v = dstq * shift[1 + i];
|
||||
|
||||
if(i < 4) vmulps(ymm2, ymm1, Ymm(4 + i));
|
||||
else vmulps(ymm2, ymm1, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
|
||||
if(m_sel.fst)
|
||||
{
|
||||
|
@ -253,14 +253,14 @@ void GSSetupPrimCodeGenerator::Color_AVX2()
|
|||
// GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32();
|
||||
|
||||
if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i));
|
||||
else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
vcvttps2dq(ymm0, ymm0);
|
||||
vpackssdw(ymm0, ymm0);
|
||||
|
||||
// GSVector4i b = GSVector8i(db * shift[1 + i]).ps32();
|
||||
|
||||
if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i));
|
||||
else vmulps(ymm1, ymm3, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
vcvttps2dq(ymm1, ymm1);
|
||||
vpackssdw(ymm1, ymm1);
|
||||
|
||||
|
@ -285,14 +285,14 @@ void GSSetupPrimCodeGenerator::Color_AVX2()
|
|||
// GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32();
|
||||
|
||||
if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i));
|
||||
else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
vcvttps2dq(ymm0, ymm0);
|
||||
vpackssdw(ymm0, ymm0);
|
||||
|
||||
// GSVector8i a = GSVector8i(da * shift[1 + i]).ps32();
|
||||
|
||||
if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i));
|
||||
else vmulps(ymm1, ymm3, ptr[&m_shift[i + 1]]);
|
||||
else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]);
|
||||
vcvttps2dq(ymm1, ymm1);
|
||||
vpackssdw(ymm1, ymm1);
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_SSE()
|
|||
|
||||
for(int i = 0; i < (m_sel.notest ? 2 : 5); i++)
|
||||
{
|
||||
movaps(Xmm(3 + i), ptr[&m_shift[i]]);
|
||||
movaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in New Issue