From 8431299b92fcc13568f977b76838f91a9f0597b5 Mon Sep 17 00:00:00 2001 From: Gregory Hainaut Date: Thu, 24 Nov 2016 10:16:35 +0100 Subject: [PATCH] gsdx sw: port code to the new constant object --- plugins/GSdx/GS.cpp | 8 +-- plugins/GSdx/GSDrawScanline.cpp | 13 ++-- plugins/GSdx/GSDrawScanlineCodeGenerator.cpp | 72 ------------------- plugins/GSdx/GSDrawScanlineCodeGenerator.h | 10 --- .../GSDrawScanlineCodeGenerator.x64.avx.cpp | 2 +- .../GSDrawScanlineCodeGenerator.x64.avx2.cpp | 32 ++++----- .../GSDrawScanlineCodeGenerator.x86.avx.cpp | 30 ++++---- .../GSDrawScanlineCodeGenerator.x86.avx2.cpp | 32 ++++----- .../GSdx/GSDrawScanlineCodeGenerator.x86.cpp | 20 +++--- plugins/GSdx/GSSetupPrimCodeGenerator.cpp | 40 ----------- plugins/GSdx/GSSetupPrimCodeGenerator.h | 8 --- .../GSdx/GSSetupPrimCodeGenerator.x64.avx.cpp | 2 +- .../GSSetupPrimCodeGenerator.x64.avx2.cpp | 2 +- plugins/GSdx/GSSetupPrimCodeGenerator.x64.cpp | 2 +- .../GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp | 2 +- .../GSSetupPrimCodeGenerator.x86.avx2.cpp | 16 ++--- plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp | 2 +- 17 files changed, 82 insertions(+), 211 deletions(-) diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 2b5ebcbf5d..a9730d3ec2 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -131,12 +131,10 @@ EXPORT_C_(int) GSinit() GSBlock::InitVectors(); GSClut::InitVectors(); - GSDrawScanlineCodeGenerator::InitVectors(); #ifdef ENABLE_OPENCL GSRendererCL::InitVectors(); #endif GSRendererSW::InitVectors(); - GSSetupPrimCodeGenerator::InitVectors(); GSVector4i::InitVectors(); GSVector4::InitVectors(); #if _M_SSE >= 0x500 @@ -147,6 +145,9 @@ EXPORT_C_(int) GSinit() #endif GSVertexTrace::InitVectors(); + if (g_const == nullptr) + return -1; + #ifdef _WIN32 s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); @@ -165,8 +166,7 @@ EXPORT_C GSshutdown() gsopen_done = false; delete s_gs; - - s_gs = NULL; + s_gs = nullptr; s_renderer = GSRendererType::Undefined; diff --git a/plugins/GSdx/GSDrawScanline.cpp b/plugins/GSdx/GSDrawScanline.cpp index c65d2fb7c3..fd58ca9c1b 100644 --- a/plugins/GSdx/GSDrawScanline.cpp +++ b/plugins/GSdx/GSDrawScanline.cpp @@ -117,7 +117,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co #if _M_SSE >= 0x501 - const GSVector8* shift = GSSetupPrimCodeGenerator::m_shift; + const GSVector8* shift = (GSVector8*)g_const->m_shift_256b; if(has_z || has_f) { @@ -271,7 +271,7 @@ void GSDrawScanline::SetupPrim(const GSVertexSW* vertex, const uint32* index, co #else - const GSVector4* shift = GSSetupPrimCodeGenerator::m_shift; + const GSVector4* shift = (GSVector4*)g_const->m_shift_128b; if(has_z || has_f) { @@ -441,7 +441,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS skip = left & 7; steps = pixels + skip - 8; left -= skip; - test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[skip]) | GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]); + test = GSVector8i::i8to32c(g_const->m_test_256b[skip]) | GSVector8i::i8to32c(g_const->m_test_256b[15 + (steps & (steps >> 31))]); } else { @@ -1532,12 +1532,13 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if(!sel.notest) { - test = GSVector8i::i8to32c(GSDrawScanlineCodeGenerator::m_test[15 + (steps & (steps >> 31))]); + test = GSVector8i::i8to32c(g_const->m_test_256b[15 + (steps & (steps >> 31))]); } } #else + const GSVector4i* const_test = (GSVector4i*)g_const->m_test_128b; GSVector4i test; GSVector4 zo; GSVector4i f; @@ -1555,7 +1556,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS skip = left & 3; steps = pixels + skip - 4; left -= skip; - test = GSDrawScanlineCodeGenerator::m_test[skip] | GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; + test = const_test[skip] | const_test[7 + (steps & (steps >> 31))]; } else { @@ -2625,7 +2626,7 @@ void GSDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexS if(!sel.notest) { - test = GSDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))]; + test = const_test[7 + (steps & (steps >> 31))]; } } diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp index 536752a65a..df6391f10b 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp @@ -33,78 +33,6 @@ void GSDrawScanlineCodeGenerator::Generate() } #endif -#if _M_SSE >= 0x501 - -alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] = -{ - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0x00}, - {0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xff}, - {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, -}; - -GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4]; -#else -GSVector4i GSDrawScanlineCodeGenerator::m_test[8]; -GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4]; -#endif - -void GSDrawScanlineCodeGenerator::InitVectors() -{ -#if _M_SSE >= 0x501 - GSVector8 log2_coef[4] = - { - GSVector8(0.204446009836232697516f), - GSVector8(-1.04913055217340124191f), - GSVector8(2.28330284476918490682f), - GSVector8(1.0f), - }; - - for (size_t n = 0; n < countof(log2_coef); ++n) - m_log2_coef[n] = log2_coef[n]; - -#else - GSVector4i test[8] = - { - GSVector4i::zero(), - GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), - GSVector4i::zero(), - }; - - GSVector4 log2_coef[4] = - { - GSVector4(0.204446009836232697516f), - GSVector4(-1.04913055217340124191f), - GSVector4(2.28330284476918490682f), - GSVector4(1.0f), - }; - - for (size_t n = 0; n < countof(test); ++n) - m_test[n] = test[n]; - - for (size_t n = 0; n < countof(log2_coef); ++n) - m_log2_coef[n] = log2_coef[n]; - -#endif -} - GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) , m_local(*(GSScanlineLocalData*)param) diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.h b/plugins/GSdx/GSDrawScanlineCodeGenerator.h index 2db5658da4..754a13b639 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.h @@ -143,14 +143,4 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator public: GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); - - #if _M_SSE >= 0x501 - alignas(8) static const uint8 m_test[16][8]; - static GSVector8 m_log2_coef[4]; - #else - static GSVector4i m_test[8]; - static GSVector4 m_log2_coef[4]; - #endif - - static void InitVectors(); }; diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp index 68c11b294c..2e552cf039 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx.cpp @@ -96,7 +96,7 @@ void GSDrawScanlineCodeGenerator::Generate_AVX() } #endif - mov(r10, (size_t)&m_test[0]); + mov(r10, (size_t)g_const->m_test_128b[0]); if (!m_rip) { mov(_m_local, (size_t)&m_local); diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx2.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx2.cpp index 42c74fcb7a..804f861d37 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx2.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x64.avx2.cpp @@ -103,7 +103,7 @@ void GSDrawScanlineCodeGenerator::Generate() } #endif - mov(r10, (size_t)&m_test[0]); + mov(r10, (size_t)g_const->m_test_256b[0]); if (!m_rip) { mov(_m_local, (size_t)&m_local); @@ -363,8 +363,8 @@ void GSDrawScanlineCodeGenerator::Init() sar(eax, 31); and(eax, ecx); - vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]); - vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]); + vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[0]]); + vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)g_const->m_test_256b[15]]); vpor(ymm7, ymm0); shl(edx, 5); @@ -683,7 +683,7 @@ void GSDrawScanlineCodeGenerator::Step() sar(edx, 31); and(edx, ecx); - vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]); + vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[15]]); } } @@ -1255,31 +1255,31 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() vpslld(ymm0, ymm4, 1); vpsrld(ymm0, ymm0, 24); vpsubd(ymm0, ymm1); - vcvtdq2ps(ymm0, ymm0); + vcvtdq2ps(ymm0, ymm0); // ymm0 = (float)(exp(q) - 127) vpslld(ymm4, ymm4, 9); vpsrld(ymm4, ymm4, 9); - vorps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - + vorps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); + // ymm4 = mant(q) | 1.0f if(m_cpu.has(util::Cpu::tFMA)) { - vmovaps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0 - vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * ymm4 + c1 - vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * ymm4 + c1) * ymm4 + c2 - vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // ymm4 - 1.0f + vmovaps(ymm5, ptr[g_const->m_log2_coef_256b[0]]); // c0 + vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[1]]); // c0 * ymm4 + c1 + vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[2]]); // (c0 * ymm4 + c1) * ymm4 + c2 + vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); // ymm4 - 1.0f vfmadd213ps(ymm4, ymm5, ymm0); // ((c0 * ymm4 + c1) * ymm4 + c2) * (ymm4 - 1.0f) + ymm0 } else { - vmulps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); + vmulps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[0]]); + vaddps(ymm5, ptr[g_const->m_log2_coef_256b[1]]); vmulps(ymm5, ymm4); - vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); + vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); + vaddps(ymm5, ptr[g_const->m_log2_coef_256b[2]]); vmulps(ymm4, ymm5); vaddps(ymm4, ymm0); } @@ -1289,7 +1289,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() if(m_cpu.has(util::Cpu::tFMA)) { vmovaps(ymm5, ptr[&m_local.gd->l]); - vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); + vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); } else { diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp index aabfbd88c5..f885bf9925 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx.cpp @@ -269,14 +269,14 @@ void GSDrawScanlineCodeGenerator::Init_AVX() shl(edx, 4); - vmovdqa(xmm7, ptr[edx + (size_t)&m_test[0]]); + vmovdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[0]]); mov(eax, ecx); sar(eax, 31); and(eax, ecx); shl(eax, 4); - vpor(xmm7, ptr[eax + (size_t)&m_test[7]]); + vpor(xmm7, ptr[eax + (size_t)g_const->m_test_128b[7]]); } else { @@ -591,7 +591,7 @@ void GSDrawScanlineCodeGenerator::Step_AVX() and(edx, ecx); shl(edx, 4); - vmovdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); + vmovdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[7]]); } } @@ -1141,31 +1141,31 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() vpslld(xmm0, xmm4, 1); vpsrld(xmm0, xmm0, 24); vpsubd(xmm0, xmm1); - vcvtdq2ps(xmm0, xmm0); + vcvtdq2ps(xmm0, xmm0); // xmm0 = (float)(exp(q) - 127) vpslld(xmm4, xmm4, 9); vpsrld(xmm4, xmm4, 9); - vorps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - + vorps(xmm4, ptr[g_const->m_log2_coef_128b[3]]); + // xmm4 = mant(q) | 1.0f if(m_cpu.has(util::Cpu::tFMA)) { - vmovaps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0 - vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * xmm4 + c1 - vfmadd213ps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * xmm4 + c1) * xmm4 + c2 - vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // xmm4 - 1.0f + vmovaps(xmm5, ptr[g_const->m_log2_coef_128b[0]]); // c0 + vfmadd213ps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[1]]); // c0 * xmm4 + c1 + vfmadd213ps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[2]]); // (c0 * xmm4 + c1) * xmm4 + c2 + vsubps(xmm4, ptr[g_const->m_log2_coef_128b[3]]); // xmm4 - 1.0f vfmadd213ps(xmm4, xmm5, xmm0); // ((c0 * xmm4 + c1) * xmm4 + c2) * (xmm4 - 1.0f) + xmm0 } else { - vmulps(xmm5, xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); + vmulps(xmm5, xmm4, ptr[g_const->m_log2_coef_128b[0]]); + vaddps(xmm5, ptr[g_const->m_log2_coef_128b[1]]); vmulps(xmm5, xmm4); - vsubps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - vaddps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); + vsubps(xmm4, ptr[g_const->m_log2_coef_128b[3]]); + vaddps(xmm5, ptr[g_const->m_log2_coef_128b[2]]); vmulps(xmm4, xmm5); vaddps(xmm4, xmm0); } @@ -1175,7 +1175,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX() if(m_cpu.has(util::Cpu::tFMA)) { vmovaps(xmm5, ptr[&m_local.gd->l]); - vfmadd213ps(xmm4, xmm5, ptr[&m_local.gd->k]); + vfmadd213ps(xmm4, xmm5, ptr[&m_local.gd->k]); } else { diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp index 29a8b7130b..7d24272faa 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.avx2.cpp @@ -268,13 +268,13 @@ void GSDrawScanlineCodeGenerator::Init() sub(ebx, edx); // GSVector4i test = m_test[skip] | m_test[15 + (steps & (steps >> 31))]; - + mov(eax, ecx); sar(eax, 31); and(eax, ecx); - vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[0]]); - vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)&m_test[15]]); + vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[0]]); + vpmovsxbd(ymm0, ptr[eax * 8 + (size_t)g_const->m_test_256b[15]]); vpor(ymm7, ymm0); shl(edx, 5); @@ -593,7 +593,7 @@ void GSDrawScanlineCodeGenerator::Step() sar(edx, 31); and(edx, ecx); - vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)&m_test[15]]); + vpmovsxbd(ymm7, ptr[edx * 8 + (size_t)g_const->m_test_256b[15]]); } } @@ -1157,31 +1157,31 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() vpslld(ymm0, ymm4, 1); vpsrld(ymm0, ymm0, 24); vpsubd(ymm0, ymm1); - vcvtdq2ps(ymm0, ymm0); + vcvtdq2ps(ymm0, ymm0); // ymm0 = (float)(exp(q) - 127) vpslld(ymm4, ymm4, 9); vpsrld(ymm4, ymm4, 9); - vorps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - + vorps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); + // ymm4 = mant(q) | 1.0f if(m_cpu.has(util::Cpu::tFMA)) { - vmovaps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); // c0 - vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); // c0 * ymm4 + c1 - vfmadd213ps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); // (c0 * ymm4 + c1) * ymm4 + c2 - vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); // ymm4 - 1.0f + vmovaps(ymm5, ptr[g_const->m_log2_coef_256b[0]]); // c0 + vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[1]]); // c0 * ymm4 + c1 + vfmadd213ps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[2]]); // (c0 * ymm4 + c1) * ymm4 + c2 + vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); // ymm4 - 1.0f vfmadd213ps(ymm4, ymm5, ymm0); // ((c0 * ymm4 + c1) * ymm4 + c2) * (ymm4 - 1.0f) + ymm0 } else { - vmulps(ymm5, ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); + vmulps(ymm5, ymm4, ptr[g_const->m_log2_coef_256b[0]]); + vaddps(ymm5, ptr[g_const->m_log2_coef_256b[1]]); vmulps(ymm5, ymm4); - vsubps(ymm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - vaddps(ymm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); + vsubps(ymm4, ptr[g_const->m_log2_coef_256b[3]]); + vaddps(ymm5, ptr[g_const->m_log2_coef_256b[2]]); vmulps(ymm4, ymm5); vaddps(ymm4, ymm0); } @@ -1191,7 +1191,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD() if(m_cpu.has(util::Cpu::tFMA)) { vmovaps(ymm5, ptr[&m_local.gd->l]); - vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); + vfmadd213ps(ymm4, ymm5, ptr[&m_local.gd->k]); } else { diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp index b29a5eb3f4..851ca37854 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.x86.cpp @@ -269,14 +269,14 @@ void GSDrawScanlineCodeGenerator::Init_SSE() shl(edx, 4); - movdqa(xmm7, ptr[edx + (size_t)&m_test[0]]); + movdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[0]]); mov(eax, ecx); sar(eax, 31); and(eax, ecx); shl(eax, 4); - por(xmm7, ptr[eax + (size_t)&m_test[7]]); + por(xmm7, ptr[eax + (size_t)g_const->m_test_128b[7]]); } else { @@ -596,7 +596,7 @@ void GSDrawScanlineCodeGenerator::Step_SSE() and(edx, ecx); shl(edx, 4); - movdqa(xmm7, ptr[edx + (size_t)&m_test[7]]); + movdqa(xmm7, ptr[edx + (size_t)g_const->m_test_128b[7]]); } } @@ -1183,22 +1183,22 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE() pslld(xmm0, 1); psrld(xmm0, 24); psubd(xmm0, xmm1); - cvtdq2ps(xmm0, xmm0); + cvtdq2ps(xmm0, xmm0); // xmm0 = (float)(exp(q) - 127) pslld(xmm4, 9); psrld(xmm4, 9); - orps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - + orps(xmm4, ptr[g_const->m_log2_coef_128b[3]]); + // xmm4 = mant(q) | 1.0f movdqa(xmm5, xmm4); - mulps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[0]]); - addps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[1]]); + mulps(xmm5, ptr[g_const->m_log2_coef_128b[0]]); + addps(xmm5, ptr[g_const->m_log2_coef_128b[1]]); mulps(xmm5, xmm4); - subps(xmm4, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[3]]); - addps(xmm5, ptr[&GSDrawScanlineCodeGenerator::m_log2_coef[2]]); + subps(xmm4, ptr[g_const->m_log2_coef_128b[3]]); + addps(xmm5, ptr[g_const->m_log2_coef_128b[2]]); mulps(xmm4, xmm5); addps(xmm4, xmm0); diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp index de731c84fa..7ce66f2599 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp @@ -24,46 +24,6 @@ using namespace Xbyak; -#if _M_SSE >= 0x501 -GSVector8 GSSetupPrimCodeGenerator::m_shift[9]; -#else -GSVector4 GSSetupPrimCodeGenerator::m_shift[5]; -#endif - -void GSSetupPrimCodeGenerator::InitVectors() -{ -#if _M_SSE >= 0x501 - GSVector8 shift[9] = - { - GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f), - GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f), - GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f), - GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f), - GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f), - GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f), - GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f), - GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f), - GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f), - }; - - for (size_t n = 0; n < countof(shift); ++n) - m_shift[n] = shift[n]; - -#else - GSVector4 shift[5] = - { - GSVector4(4.0f, 4.0f, 4.0f, 4.0f), - GSVector4(0.0f, 1.0f, 2.0f, 3.0f), - GSVector4(-1.0f, 0.0f, 1.0f, 2.0f), - GSVector4(-2.0f, -1.0f, 0.0f, 1.0f), - GSVector4(-3.0f, -2.0f, -1.0f, 0.0f), - }; - - for (size_t n = 0; n < countof(shift); ++n) - m_shift[n] = shift[n]; -#endif -} - GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) , m_local(*(GSScanlineLocalData*)param) diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.h b/plugins/GSdx/GSSetupPrimCodeGenerator.h index dee01fddc7..928de3646d 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.h +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.h @@ -54,12 +54,4 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator public: GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); - - #if _M_SSE >= 0x501 - static GSVector8 m_shift[9]; - #else - static GSVector4 m_shift[5]; - #endif - - static void InitVectors(); }; diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx.cpp index e7113d00df..a42bf121d1 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx.cpp @@ -48,7 +48,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX() if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { - mov(rax, (size_t)&m_shift[0]); + mov(rax, (size_t)g_const->m_shift_128b); for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) { diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx2.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx2.cpp index 6b080b74b7..81ead23f0a 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx2.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x64.avx2.cpp @@ -53,7 +53,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX2() if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { - mov(rax, (size_t)&m_shift[0]); + mov(rax, (size_t)g_const->m_shift_256b); for(int i = 0; i < (m_sel.notest ? 2 : 9); i++) { diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x64.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x64.cpp index b4169e766f..76b5fbbf35 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x64.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x64.cpp @@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_SSE() if((m_en.z || m_en.f) && m_sel.prim != GS_SPRITE_CLASS || m_en.t || m_en.c && m_sel.iip) { - mov(rax, (size_t)&m_shift[0]); + mov(rax, (size_t)g_const->m_shift_128b[0]); for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) { diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp index f75ea3b6d2..fbaf27775d 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx.cpp @@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX() for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) { - vmovaps(Xmm(3 + i), ptr[&m_shift[i]]); + vmovaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]); } } diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx2.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx2.cpp index e1f7cc995e..a42600cb82 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx2.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.avx2.cpp @@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_AVX2() for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) { - vmovaps(Ymm(3 + i), ptr[&m_shift[i]]); + vmovaps(Ymm(3 + i), ptr[g_const->m_shift_256b[i]]); } } @@ -104,7 +104,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() // m_local.d[i].z = dz * shift[1 + i]; if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]); + else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vmovaps(ptr[&m_local.d[i].z], ymm0); } @@ -113,7 +113,7 @@ void GSSetupPrimCodeGenerator::Depth_AVX2() // m_local.d[i].f = GSVector8i(df * m_shift[i]).xxzzlh(); if(i < 4) vmulps(ymm0, ymm1, Ymm(4 + i)); - else vmulps(ymm0, ymm1, ptr[&m_shift[i + 1]]); + else vmulps(ymm0, ymm1, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm0, ymm0); vpshuflw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); vpshufhw(ymm0, ymm0, _MM_SHUFFLE(2, 2, 0, 0)); @@ -190,7 +190,7 @@ void GSSetupPrimCodeGenerator::Texture_AVX2() // GSVector8 v = dstq * shift[1 + i]; if(i < 4) vmulps(ymm2, ymm1, Ymm(4 + i)); - else vmulps(ymm2, ymm1, ptr[&m_shift[i + 1]]); + else vmulps(ymm2, ymm1, ptr[g_const->m_shift_256b[i + 1]]); if(m_sel.fst) { @@ -253,14 +253,14 @@ void GSSetupPrimCodeGenerator::Color_AVX2() // GSVector8i r = GSVector8i(dr * shift[1 + i]).ps32(); if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]); + else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm0, ymm0); vpackssdw(ymm0, ymm0); // GSVector4i b = GSVector8i(db * shift[1 + i]).ps32(); if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); - else vmulps(ymm1, ymm3, ptr[&m_shift[i + 1]]); + else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm1, ymm1); vpackssdw(ymm1, ymm1); @@ -285,14 +285,14 @@ void GSSetupPrimCodeGenerator::Color_AVX2() // GSVector8i g = GSVector8i(dg * shift[1 + i]).ps32(); if(i < 4) vmulps(ymm0, ymm2, Ymm(4 + i)); - else vmulps(ymm0, ymm2, ptr[&m_shift[i + 1]]); + else vmulps(ymm0, ymm2, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm0, ymm0); vpackssdw(ymm0, ymm0); // GSVector8i a = GSVector8i(da * shift[1 + i]).ps32(); if(i < 4) vmulps(ymm1, ymm3, Ymm(4 + i)); - else vmulps(ymm1, ymm3, ptr[&m_shift[i + 1]]); + else vmulps(ymm1, ymm3, ptr[g_const->m_shift_256b[i + 1]]); vcvttps2dq(ymm1, ymm1); vpackssdw(ymm1, ymm1); diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp index 951788fa09..6416ee1051 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.x86.cpp @@ -40,7 +40,7 @@ void GSSetupPrimCodeGenerator::Generate_SSE() for(int i = 0; i < (m_sel.notest ? 2 : 5); i++) { - movaps(Xmm(3 + i), ptr[&m_shift[i]]); + movaps(Xmm(3 + i), ptr[g_const->m_shift_128b[i]]); } }