diff --git a/plugins/GSdx/GS.cpp b/plugins/GSdx/GS.cpp index 752919206e..e6cfea1f9a 100644 --- a/plugins/GSdx/GS.cpp +++ b/plugins/GSdx/GS.cpp @@ -123,6 +123,30 @@ EXPORT_C_(int) GSinit() return -1; } + // Vector instructions must be avoided when initialising GSdx since PCSX2 + // can crash if the CPU does not support the instruction set. + // Initialise it here instead - it's not ideal since we have to strip the + // const type qualifier from all the affected variables. + theApp.Init(); + + GSBlock::InitVectors(); + GSClut::InitVectors(); + GSDrawScanlineCodeGenerator::InitVectors(); +#ifdef ENABLE_OPENCL + GSRendererCL::InitVectors(); +#endif + GSRendererSW::InitVectors(); + GSSetupPrimCodeGenerator::InitVectors(); + GSVector4i::InitVectors(); + GSVector4::InitVectors(); +#if _M_SSE >= 0x500 + GSVector8::InitVectors(); +#endif +#if _M_SSE >= 0x501 + GSVector8i::InitVectors(); +#endif + GSVertexTrace::InitVectors(); + #ifdef _WIN32 s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); @@ -793,6 +817,8 @@ EXPORT_C GSconfigure() { if(!GSUtil::CheckSSE()) return; + theApp.Init(); + #ifdef _WIN32 GSDialog::InitCommonControls(); if(GSSettingsDlg().DoModal() == IDOK) diff --git a/plugins/GSdx/GSBlock.cpp b/plugins/GSdx/GSBlock.cpp index e010f700c7..62dde3289e 100644 --- a/plugins/GSdx/GSBlock.cpp +++ b/plugins/GSdx/GSBlock.cpp @@ -23,26 +23,54 @@ #include "GSBlock.h" #if _M_SSE >= 0x501 -const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); +GSVector8i GSBlock::m_r16mask; #else -const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); +GSVector4i GSBlock::m_r16mask; #endif -const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); -const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); +GSVector4i GSBlock::m_r8mask; +GSVector4i GSBlock::m_r4mask; #if _M_SSE >= 0x501 -const GSVector8i GSBlock::m_xxxa(0x00008000); -const GSVector8i GSBlock::m_xxbx(0x00007c00); -const GSVector8i GSBlock::m_xgxx(0x000003e0); -const GSVector8i GSBlock::m_rxxx(0x0000001f); +GSVector8i GSBlock::m_xxxa; +GSVector8i GSBlock::m_xxbx; +GSVector8i GSBlock::m_xgxx; +GSVector8i GSBlock::m_rxxx; #else -const GSVector4i GSBlock::m_xxxa(0x00008000); -const GSVector4i GSBlock::m_xxbx(0x00007c00); -const GSVector4i GSBlock::m_xgxx(0x000003e0); -const GSVector4i GSBlock::m_rxxx(0x0000001f); +GSVector4i GSBlock::m_xxxa; +GSVector4i GSBlock::m_xxbx; +GSVector4i GSBlock::m_xgxx; +GSVector4i GSBlock::m_rxxx; #endif -const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9); -const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11); -const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13); -const GSVector4i GSBlock::m_uw8hmask3(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15); +GSVector4i GSBlock::m_uw8hmask0; +GSVector4i GSBlock::m_uw8hmask1; +GSVector4i GSBlock::m_uw8hmask2; +GSVector4i GSBlock::m_uw8hmask3; + +void GSBlock::InitVectors() +{ +#if _M_SSE >= 0x501 + m_r16mask = GSVector8i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); +#else + m_r16mask = GSVector4i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); +#endif + m_r8mask = GSVector4i(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); + m_r4mask = GSVector4i(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); + +#if _M_SSE >= 0x501 + m_xxxa = GSVector8i(0x00008000); + m_xxbx = GSVector8i(0x00007c00); + m_xgxx = GSVector8i(0x000003e0); + m_rxxx = GSVector8i(0x0000001f); +#else + m_xxxa = GSVector4i(0x00008000); + m_xxbx = GSVector4i(0x00007c00); + m_xgxx = GSVector4i(0x000003e0); + m_rxxx = GSVector4i(0x0000001f); +#endif + + m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9); + m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11); + m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13); + m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15); +} diff --git a/plugins/GSdx/GSBlock.h b/plugins/GSdx/GSBlock.h index 1c38fcb529..b73a0efc42 100644 --- a/plugins/GSdx/GSBlock.h +++ b/plugins/GSdx/GSBlock.h @@ -28,31 +28,33 @@ class GSBlock { #if _M_SSE >= 0x501 - static const GSVector8i m_r16mask; + static GSVector8i m_r16mask; #else - static const GSVector4i m_r16mask; + static GSVector4i m_r16mask; #endif - static const GSVector4i m_r8mask; - static const GSVector4i m_r4mask; + static GSVector4i m_r8mask; + static GSVector4i m_r4mask; #if _M_SSE >= 0x501 - static const GSVector8i m_xxxa; - static const GSVector8i m_xxbx; - static const GSVector8i m_xgxx; - static const GSVector8i m_rxxx; + static GSVector8i m_xxxa; + static GSVector8i m_xxbx; + static GSVector8i m_xgxx; + static GSVector8i m_rxxx; #else - static const GSVector4i m_xxxa; - static const GSVector4i m_xxbx; - static const GSVector4i m_xgxx; - static const GSVector4i m_rxxx; + static GSVector4i m_xxxa; + static GSVector4i m_xxbx; + static GSVector4i m_xgxx; + static GSVector4i m_rxxx; #endif - static const GSVector4i m_uw8hmask0; - static const GSVector4i m_uw8hmask1; - static const GSVector4i m_uw8hmask2; - static const GSVector4i m_uw8hmask3; + static GSVector4i m_uw8hmask0; + static GSVector4i m_uw8hmask1; + static GSVector4i m_uw8hmask2; + static GSVector4i m_uw8hmask3; public: + static void InitVectors(); + template __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) { const uint8* RESTRICT s0 = &src[srcpitch * 0]; diff --git a/plugins/GSdx/GSClut.cpp b/plugins/GSdx/GSClut.cpp index 261604e4fd..30808bcd47 100644 --- a/plugins/GSdx/GSClut.cpp +++ b/plugins/GSdx/GSClut.cpp @@ -682,17 +682,24 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector // TODO -static const GSVector4i s_bm(0x00007c00); -static const GSVector4i s_gm(0x000003e0); -static const GSVector4i s_rm(0x0000001f); +GSVector4i GSClut::m_bm; +GSVector4i GSClut::m_gm; +GSVector4i GSClut::m_rm; + +void GSClut::InitVectors() +{ + m_bm = GSVector4i(0x00007c00); + m_gm = GSVector4i(0x000003e0); + m_rm = GSVector4i(0x0000001f); +} void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA) { ASSERT((w & 7) == 0); - const GSVector4i rm = s_rm; - const GSVector4i gm = s_gm; - const GSVector4i bm = s_bm; + const GSVector4i rm = m_rm; + const GSVector4i gm = m_gm; + const GSVector4i bm = m_bm; GSVector4i TA0(TEXA.TA0 << 24); GSVector4i TA1(TEXA.TA1 << 24); diff --git a/plugins/GSdx/GSClut.h b/plugins/GSdx/GSClut.h index d39eb1602f..a4c6ce750a 100644 --- a/plugins/GSdx/GSClut.h +++ b/plugins/GSdx/GSClut.h @@ -30,6 +30,10 @@ class GSLocalMemory; class alignas(32) GSClut : public GSAlignedClass<32> { + static GSVector4i m_bm; + static GSVector4i m_gm; + static GSVector4i m_rm; + GSLocalMemory* m_mem; uint32 m_CBP[2]; @@ -93,6 +97,8 @@ class alignas(32) GSClut : public GSAlignedClass<32> static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA); public: + static void InitVectors(); + GSClut(GSLocalMemory* mem); virtual ~GSClut(); diff --git a/plugins/GSdx/GSCrc.cpp b/plugins/GSdx/GSCrc.cpp index b6bc934257..92470e8dfe 100644 --- a/plugins/GSdx/GSCrc.cpp +++ b/plugins/GSdx/GSCrc.cpp @@ -516,7 +516,7 @@ CRC::Game CRC::m_games[] = {0x06A7506A, SacredBlaze, JP, 0}, }; -hash_map CRC::m_map; +map CRC::m_map; string ToLower( string str ) { @@ -563,7 +563,7 @@ CRC::Game CRC::Lookup(uint32 crc) printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups); } - hash_map::iterator i = m_map.find(crc); + auto i = m_map.find(crc); if(i != m_map.end()) { diff --git a/plugins/GSdx/GSCrc.h b/plugins/GSdx/GSCrc.h index 6a36da9f92..722c00656f 100644 --- a/plugins/GSdx/GSCrc.h +++ b/plugins/GSdx/GSCrc.h @@ -211,7 +211,7 @@ public: private: static Game m_games[]; - static hash_map m_map; + static map m_map; public: static Game Lookup(uint32 crc); diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp index 09a233a6ae..90c941638e 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.cpp @@ -44,37 +44,55 @@ alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, }; -const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] = +GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4]; +#else +GSVector4i GSDrawScanlineCodeGenerator::m_test[8]; +GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4]; +#endif + +void GSDrawScanlineCodeGenerator::InitVectors() { - GSVector8(0.204446009836232697516f), - GSVector8(-1.04913055217340124191f), - GSVector8(2.28330284476918490682f), - GSVector8(1.0f), -}; +#if _M_SSE >= 0x501 + GSVector8 log2_coef[4] = + { + GSVector8(0.204446009836232697516f), + GSVector8(-1.04913055217340124191f), + GSVector8(2.28330284476918490682f), + GSVector8(1.0f), + }; + + for (size_t n = 0; n < countof(log2_coef); ++n) + m_log2_coef[n] = log2_coef[n]; #else + GSVector4i test[8] = + { + GSVector4i::zero(), + GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), + GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), + GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), + GSVector4i::zero(), + }; -const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] = -{ - GSVector4i::zero(), - GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), - GSVector4i::zero(), -}; + GSVector4 log2_coef[4] = + { + GSVector4(0.204446009836232697516f), + GSVector4(-1.04913055217340124191f), + GSVector4(2.28330284476918490682f), + GSVector4(1.0f), + }; -const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] = -{ - GSVector4(0.204446009836232697516f), - GSVector4(-1.04913055217340124191f), - GSVector4(2.28330284476918490682f), - GSVector4(1.0f), -}; + for (size_t n = 0; n < countof(test); ++n) + m_test[n] = test[n]; + + for (size_t n = 0; n < countof(log2_coef); ++n) + m_log2_coef[n] = log2_coef[n]; #endif +} GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) @@ -354,4 +372,4 @@ void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a) #endif } -#endif \ No newline at end of file +#endif diff --git a/plugins/GSdx/GSDrawScanlineCodeGenerator.h b/plugins/GSdx/GSDrawScanlineCodeGenerator.h index 5ff54b4d4b..c737e2f0d9 100644 --- a/plugins/GSdx/GSDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/GSDrawScanlineCodeGenerator.h @@ -136,10 +136,11 @@ public: #if _M_SSE >= 0x501 alignas(8) static const uint8 m_test[16][8]; - static const GSVector8 m_log2_coef[4]; + static GSVector8 m_log2_coef[4]; #else - static const GSVector4i m_test[8]; - static const GSVector4 m_log2_coef[4]; + static GSVector4i m_test[8]; + static GSVector4 m_log2_coef[4]; #endif + static void InitVectors(); }; diff --git a/plugins/GSdx/GSRendererCL.cpp b/plugins/GSdx/GSRendererCL.cpp index 895f01d2d7..99f5eb5890 100644 --- a/plugins/GSdx/GSRendererCL.cpp +++ b/plugins/GSdx/GSRendererCL.cpp @@ -74,6 +74,13 @@ typedef struct #pragma pack(pop) +static GSVector4 GSRendererCL::m_pos_scale; + +void GSRendererCL::InitVectors() +{ + m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 1.0f); +} + GSRendererCL::GSRendererCL() : m_vb_count(0) , m_synced(true) @@ -200,8 +207,6 @@ GSTexture* GSRendererCL::GetOutput(int i, int& y_offset) return m_texture[i]; } -const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 1.0f); - template void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) { @@ -214,7 +219,7 @@ void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* GSVector4i xyzuvf(src->m[1]); - dst->p = (GSVector4(xyzuvf.upl16() - o) * g_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints + dst->p = (GSVector4(xyzuvf.upl16() - o) * m_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints GSVector4 t = GSVector4::zero(); diff --git a/plugins/GSdx/GSRendererCL.h b/plugins/GSdx/GSRendererCL.h index d1b849b6cd..6f656e75a6 100644 --- a/plugins/GSdx/GSRendererCL.h +++ b/plugins/GSdx/GSRendererCL.h @@ -32,6 +32,8 @@ struct alignas(32) GSVertexCL class GSRendererCL : public GSRenderer { + static GSVector4 m_pos_scale; + typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); ConvertVertexBufferPtr m_cvb[4][2][2]; @@ -261,6 +263,8 @@ protected: bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count); public: + static void InitVectors(); + GSRendererCL(); virtual ~GSRendererCL(); }; diff --git a/plugins/GSdx/GSRendererSW.cpp b/plugins/GSdx/GSRendererSW.cpp index 7fbc1ea16b..09fbb66b3d 100644 --- a/plugins/GSdx/GSRendererSW.cpp +++ b/plugins/GSdx/GSRendererSW.cpp @@ -26,11 +26,19 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; -const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); +GSVector4 GSRendererSW::m_pos_scale; +#if _M_SSE >= 0x501 +GSVector8 GSRendererSW::m_pos_scale2; +#endif + +void GSRendererSW::InitVectors() +{ + m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #if _M_SSE >= 0x501 -const GSVector8 g_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); + m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); #endif +} GSRendererSW::GSRendererSW(int threads) : m_fzb(NULL) @@ -294,7 +302,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* GSVector8i xy = xyzuvf.upl16() - o2; GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00()); - GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * g_pos_scale2; + GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * m_pos_scale2; GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7); GSVector8 t = GSVector8::zero(); @@ -364,7 +372,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex* #endif - dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; + dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * m_pos_scale; dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7); GSVector4 t = GSVector4::zero(); diff --git a/plugins/GSdx/GSRendererSW.h b/plugins/GSdx/GSRendererSW.h index b2e1c2155b..2689e98011 100644 --- a/plugins/GSdx/GSRendererSW.h +++ b/plugins/GSdx/GSRendererSW.h @@ -27,6 +27,11 @@ class GSRendererSW : public GSRenderer { + static GSVector4 m_pos_scale; +#if _M_SSE >= 0x501 + static GSVector8 m_pos_scale2; +#endif + class SharedData : public GSDrawScanline::SharedData { struct alignas(16) TextureLevel @@ -95,6 +100,8 @@ protected: bool GetScanlineGlobalData(SharedData* data); public: + static void InitVectors(); + GSRendererSW(int threads); virtual ~GSRendererSW(); }; diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp index 37e253ee9f..37427898f8 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.cpp @@ -23,32 +23,44 @@ #include "GSSetupPrimCodeGenerator.h" #if _M_SSE >= 0x501 +GSVector8 GSSetupPrimCodeGenerator::m_shift[9]; +#else +GSVector4 GSSetupPrimCodeGenerator::m_shift[5]; +#endif -const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] = +void GSSetupPrimCodeGenerator::InitVectors() { - GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f), - GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f), - GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f), - GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f), - GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f), - GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f), - GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f), - GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f), - GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f), -}; +#if _M_SSE >= 0x501 + GSVector8 shift[9] = + { + GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f), + GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f), + GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f), + GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f), + GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f), + GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f), + GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f), + GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f), + GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f), + }; + + for (size_t n = 0; n < countof(shift); ++n) + m_shift[n] = shift[n]; #else + GSVector4 shift[5] = + { + GSVector4(4.0f, 4.0f, 4.0f, 4.0f), + GSVector4(0.0f, 1.0f, 2.0f, 3.0f), + GSVector4(-1.0f, 0.0f, 1.0f, 2.0f), + GSVector4(-2.0f, -1.0f, 0.0f, 1.0f), + GSVector4(-3.0f, -2.0f, -1.0f, 0.0f), + }; -const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] = -{ - GSVector4(4.0f, 4.0f, 4.0f, 4.0f), - GSVector4(0.0f, 1.0f, 2.0f, 3.0f), - GSVector4(-1.0f, 0.0f, 1.0f, 2.0f), - GSVector4(-2.0f, -1.0f, 0.0f, 1.0f), - GSVector4(-3.0f, -2.0f, -1.0f, 0.0f), -}; - + for (size_t n = 0; n < countof(shift); ++n) + m_shift[n] = shift[n]; #endif +} GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) : GSCodeGenerator(code, maxsize) diff --git a/plugins/GSdx/GSSetupPrimCodeGenerator.h b/plugins/GSdx/GSSetupPrimCodeGenerator.h index 746d7996aa..d4b2c1106f 100644 --- a/plugins/GSdx/GSSetupPrimCodeGenerator.h +++ b/plugins/GSdx/GSSetupPrimCodeGenerator.h @@ -43,8 +43,10 @@ public: GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); #if _M_SSE >= 0x501 - static const GSVector8 m_shift[9]; + static GSVector8 m_shift[9]; #else - static const GSVector4 m_shift[5]; + static GSVector4 m_shift[5]; #endif + + static void InitVectors(); }; diff --git a/plugins/GSdx/GSUtil.cpp b/plugins/GSdx/GSUtil.cpp index de309580be..5de503b658 100644 --- a/plugins/GSdx/GSUtil.cpp +++ b/plugins/GSdx/GSUtil.cpp @@ -35,58 +35,60 @@ const char* GSUtil::GetLibName() { - // TODO: critsec + // The following ifdef mess is courtesy of "static string str;" + // being optimised by GCC to be unusable by older CPUs. Enjoy! + static char name[255]; - static string str; + snprintf(name, sizeof(name), "GSdx " - if(str.empty()) - { - str = "GSdx"; +#ifdef _WIN32 + "%lld " +#endif +#ifdef _M_AMD64 + "64-bit " +#endif +#ifdef __INTEL_COMPILER + "(Intel C++ %d.%02d %s)", +#elif _MSC_VER + "(MSVC %d.%02d %s)", +#elif __clang__ + "(clang %d.%d.%d %s)", +#elif __GNUC__ + "(GCC %d.%d.%d %s)", +#else + "(%s)", +#endif +#ifdef _WIN32 + SVN_REV, +#endif +#ifdef __INTEL_COMPILER + __INTEL_COMPILER / 100, __INTEL_COMPILER % 100, +#elif _MSC_VER + _MSC_VER / 100, _MSC_VER % 100, +#elif __clang__ + __clang_major__, __clang_minor__, __clang_patchlevel__, +#elif __GNUC__ + __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__, +#endif - #ifdef _WIN32 - str += format(" %lld", SVN_REV); - if(SVN_MODS) str += "m"; - #endif +#if _M_SSE >= 0x501 + "AVX2" +#elif _M_SSE >= 0x500 + "AVX" +#elif _M_SSE >= 0x402 + "SSE4.2" +#elif _M_SSE >= 0x401 + "SSE4.1" +#elif _M_SSE >= 0x301 + "SSSE3" +#elif _M_SSE >= 0x200 + "SSE2" +#elif _M_SSE >= 0x100 + "SSE" +#endif + ); - #ifdef _M_AMD64 - str += " 64-bit"; - #endif - - list sl; - - #ifdef __INTEL_COMPILER - sl.push_back(format("Intel C++ %d.%02d", __INTEL_COMPILER / 100, __INTEL_COMPILER % 100)); - #elif _MSC_VER - sl.push_back(format("MSVC %d.%02d", _MSC_VER / 100, _MSC_VER % 100)); - #elif __GNUC__ - sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__)); - #endif - - #if _M_SSE >= 0x501 - sl.push_back("AVX2"); - #elif _M_SSE >= 0x500 - sl.push_back("AVX"); - #elif _M_SSE >= 0x402 - sl.push_back("SSE42"); - #elif _M_SSE >= 0x401 - sl.push_back("SSE41"); - #elif _M_SSE >= 0x301 - sl.push_back("SSSE3"); - #elif _M_SSE >= 0x200 - sl.push_back("SSE2"); - #elif _M_SSE >= 0x100 - sl.push_back("SSE"); - #endif - - for(list::iterator i = sl.begin(); i != sl.end(); ) - { - if(i == sl.begin()) str += " ("; - str += *i; - str += ++i != sl.end() ? ", " : ")"; - } - } - - return str.c_str(); + return name; } static class GSUtilMaps @@ -203,22 +205,31 @@ bool GSUtil::CheckSSE() { Xbyak::util::Cpu cpu; Xbyak::util::Cpu::Type type; + const char* instruction_set = ""; - #if _M_SSE >= 0x500 + #if _M_SSE >= 0x501 + type = Xbyak::util::Cpu::tAVX2; + instruction_set = "AVX2"; + #elif _M_SSE >= 0x500 type = Xbyak::util::Cpu::tAVX; + instruction_set = "AVX"; #elif _M_SSE >= 0x402 type = Xbyak::util::Cpu::tSSE42; + instruction_set = "SSE4.2"; #elif _M_SSE >= 0x401 type = Xbyak::util::Cpu::tSSE41; + instruction_set = "SSE4.1"; #elif _M_SSE >= 0x301 type = Xbyak::util::Cpu::tSSSE3; + instruction_set = "SSSE3"; #elif _M_SSE >= 0x200 type = Xbyak::util::Cpu::tSSE2; + instruction_set = "SSE2"; #endif if(!cpu.has(type)) { - fprintf(stderr, "This CPU does not support SSE %d.%02d", _M_SSE >> 8, _M_SSE & 0xff); + fprintf(stderr, "This CPU does not support %s\n", instruction_set); return false; } diff --git a/plugins/GSdx/GSVector.cpp b/plugins/GSdx/GSVector.cpp index d5d074ac3a..4f307a0187 100644 --- a/plugins/GSdx/GSVector.cpp +++ b/plugins/GSdx/GSVector.cpp @@ -22,148 +22,196 @@ #include "stdafx.h" #include "GSVector.h" -const GSVector4i GSVector4i::m_xff[17] = -{ - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), - GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), -}; +GSVector4i GSVector4i::m_xff[17]; +GSVector4i GSVector4i::m_x0f[17]; -const GSVector4i GSVector4i::m_x0f[17] = +void GSVector4i::InitVectors() { - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), - GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), -}; + GSVector4i xff[17] = + { + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), + GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), + }; -const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f); -const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f); -const GSVector4 GSVector4::m_half(0.5f); -const GSVector4 GSVector4::m_one(1.0f); -const GSVector4 GSVector4::m_two(2.0f); -const GSVector4 GSVector4::m_four(4.0f); -const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000))); -const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000))); -const GSVector4 GSVector4::m_max(FLT_MAX); -const GSVector4 GSVector4::m_min(FLT_MIN); + GSVector4i x0f[17] = + { + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), + GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), + }; + + for (size_t n = 0; n < countof(xff); ++n) + m_xff[n] = xff[n]; + + for (size_t n = 0; n < countof(x0f); ++n) + m_x0f[n] = x0f[n]; +} + +GSVector4 GSVector4::m_ps0123; +GSVector4 GSVector4::m_ps4567; +GSVector4 GSVector4::m_half; +GSVector4 GSVector4::m_one; +GSVector4 GSVector4::m_two; +GSVector4 GSVector4::m_four; +GSVector4 GSVector4::m_x4b000000; +GSVector4 GSVector4::m_x4f800000; +GSVector4 GSVector4::m_max; +GSVector4 GSVector4::m_min; + +void GSVector4::InitVectors() +{ + m_ps0123 = GSVector4(0.0f, 1.0f, 2.0f, 3.0f); + m_ps4567 = GSVector4(4.0f, 5.0f, 6.0f, 7.0f); + m_half = GSVector4(0.5f); + m_one = GSVector4(1.0f); + m_two = GSVector4(2.0f); + m_four = GSVector4(4.0f); + m_x4b000000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000))); + m_x4f800000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000))); + m_max = GSVector4(FLT_MAX); + m_min = GSVector4(FLT_MIN); +} #if _M_SSE >= 0x500 -const GSVector8 GSVector8::m_half(0.5f); -const GSVector8 GSVector8::m_one(1.0f); -const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); -const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); -const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000))); -const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000))); -const GSVector8 GSVector8::m_max(FLT_MAX); -const GSVector8 GSVector8::m_min(FLT_MIN); +GSVector8 GSVector8::m_half; +GSVector8 GSVector8::m_one; +GSVector8 GSVector8::m_x7fffffff; +GSVector8 GSVector8::m_x80000000; +GSVector8 GSVector8::m_x4b000000; +GSVector8 GSVector8::m_x4f800000; +GSVector8 GSVector8::m_max; +GSVector8 GSVector8::m_min; + +void GSVector8::InitVectors() +{ + m_half = GSVector8(0.5f); + m_one = GSVector8(1.0f); + m_x7fffffff = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); + m_x80000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); + m_x4b000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000))); + m_x4f800000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000))); + m_max = GSVector8(FLT_MAX); + m_min = GSVector8(FLT_MIN); +} #endif #if _M_SSE >= 0x501 +GSVector8i GSVector8i::m_xff[33]; +GSVector8i GSVector8i::m_x0f[33]; -const GSVector8i GSVector8i::m_xff[33] = +void GSVector8i::InitVectors() { - GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), - GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), -}; + GSVector8i xff[33] = + { + GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), + GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), + }; -const GSVector8i GSVector8i::m_x0f[33] = -{ - GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), - GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), -}; + GSVector8i x0f[33] = + { + GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), + GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), + }; + for (size_t n = 0; n < countof(xff); ++n) + m_xff[n] = xff[n]; + + for (size_t n = 0; n < countof(x0f); ++n) + m_x0f[n] = x0f[n]; +} #endif GSVector4i GSVector4i::fit(int arx, int ary) const diff --git a/plugins/GSdx/GSVector.h b/plugins/GSdx/GSVector.h index da46a64f71..3dfd6ed70f 100644 --- a/plugins/GSdx/GSVector.h +++ b/plugins/GSdx/GSVector.h @@ -92,8 +92,8 @@ class GSVector8i; class alignas(16) GSVector4i { - static const GSVector4i m_xff[17]; - static const GSVector4i m_x0f[17]; + static GSVector4i m_xff[17]; + static GSVector4i m_x0f[17]; public: union @@ -114,6 +114,8 @@ public: __m128i m; }; + static void InitVectors(); + __forceinline GSVector4i() { } @@ -2442,16 +2444,18 @@ public: __m128 m; }; - static const GSVector4 m_ps0123; - static const GSVector4 m_ps4567; - static const GSVector4 m_half; - static const GSVector4 m_one; - static const GSVector4 m_two; - static const GSVector4 m_four; - static const GSVector4 m_x4b000000; - static const GSVector4 m_x4f800000; - static const GSVector4 m_max; - static const GSVector4 m_min; + static GSVector4 m_ps0123; + static GSVector4 m_ps4567; + static GSVector4 m_half; + static GSVector4 m_one; + static GSVector4 m_two; + static GSVector4 m_four; + static GSVector4 m_x4b000000; + static GSVector4 m_x4f800000; + static GSVector4 m_max; + static GSVector4 m_min; + + static void InitVectors(); __forceinline GSVector4() { @@ -3343,8 +3347,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i' class alignas(32) GSVector8i { - static const GSVector8i m_xff[33]; - static const GSVector8i m_x0f[33]; + static GSVector8i m_xff[33]; + static GSVector8i m_x0f[33]; public: union @@ -3365,6 +3369,8 @@ public: __m128i m0, m1; }; + static void InitVectors(); + __forceinline GSVector8i() {} __forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true); @@ -5154,14 +5160,16 @@ public: __m128 m0, m1; }; - static const GSVector8 m_half; - static const GSVector8 m_one; - static const GSVector8 m_x7fffffff; - static const GSVector8 m_x80000000; - static const GSVector8 m_x4b000000; - static const GSVector8 m_x4f800000; - static const GSVector8 m_max; - static const GSVector8 m_min; + static GSVector8 m_half; + static GSVector8 m_one; + static GSVector8 m_x7fffffff; + static GSVector8 m_x80000000; + static GSVector8 m_x4b000000; + static GSVector8 m_x4f800000; + static GSVector8 m_max; + static GSVector8 m_min; + + static void InitVectors(); __forceinline GSVector8() { diff --git a/plugins/GSdx/GSVertexTrace.cpp b/plugins/GSdx/GSVertexTrace.cpp index 9ef65d86bb..395330e556 100644 --- a/plugins/GSdx/GSVertexTrace.cpp +++ b/plugins/GSdx/GSVertexTrace.cpp @@ -24,7 +24,12 @@ #include "GSUtil.h" #include "GSState.h" -const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX); +GSVector4 GSVertexTrace::s_minmax; + +void GSVertexTrace::InitVectors() +{ + s_minmax = GSVector4(FLT_MAX, -FLT_MAX); +} GSVertexTrace::GSVertexTrace(const GSState* state) : m_state(state) diff --git a/plugins/GSdx/GSVertexTrace.h b/plugins/GSdx/GSVertexTrace.h index caaa75a18e..f3329d57ba 100644 --- a/plugins/GSdx/GSVertexTrace.h +++ b/plugins/GSdx/GSVertexTrace.h @@ -38,7 +38,7 @@ public: protected: const GSState* m_state; - static const GSVector4 s_minmax; + static GSVector4 s_minmax; typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count); @@ -69,6 +69,8 @@ public: GSVector2 m_lod; // x = min, y = max public: + static void InitVectors(); + GSVertexTrace(const GSState* state); virtual ~GSVertexTrace() {} diff --git a/plugins/GSdx/GSdx.cpp b/plugins/GSdx/GSdx.cpp index 878de1ff33..36ca03079e 100644 --- a/plugins/GSdx/GSdx.cpp +++ b/plugins/GSdx/GSdx.cpp @@ -127,6 +127,21 @@ GSdxApp theApp; GSdxApp::GSdxApp() { + // Empty constructor causes an illegal instruction exception on an SSE4.2 machine on Windows. + // Non-empty doesn't, but raises a SIGILL signal when compiled against GCC 6.1.1. + // So here's a compromise. +#ifdef _WIN32 + Init(); +#endif +} + +void GSdxApp::Init() +{ + static bool is_initialised = false; + if (is_initialised) + return; + is_initialised = true; + m_ini = "inis/GSdx.ini"; m_section = "Settings"; diff --git a/plugins/GSdx/GSdx.h b/plugins/GSdx/GSdx.h index 31a5cc31c1..91f900e208 100644 --- a/plugins/GSdx/GSdx.h +++ b/plugins/GSdx/GSdx.h @@ -35,7 +35,8 @@ class GSdxApp public: GSdxApp(); - void* GetModuleHandlePtr(); + void Init(); + void* GetModuleHandlePtr(); #ifdef _WIN32 HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();} diff --git a/plugins/GSdx/PSX/GPU.cpp b/plugins/GSdx/PSX/GPU.cpp index 6c26a94d82..703075c70f 100644 --- a/plugins/GSdx/PSX/GPU.cpp +++ b/plugins/GSdx/PSX/GPU.cpp @@ -57,8 +57,33 @@ EXPORT_C_(uint32) PSEgetLibVersion() return version << 16 | revision << 8 | PLUGIN_VERSION; } +static void InitVectors() +{ + GSVector4i::InitVectors(); + GSVector4::InitVectors(); +#if _M_SSE >= 0x500 + GSVector8::InitVectors(); +#endif +#if _M_SSE >= 0x501 + GSVector8i::InitVectors(); +#endif + + GPUDrawScanlineCodeGenerator::InitVectors(); + GPULocalMemory::InitVectors(); + GPUSetupPrimCodeGenerator::InitVectors(); +} + EXPORT_C_(int32) GPUinit() { + if(!GSUtil::CheckSSE()) + { + return -1; + } + + theApp.Init(); + + InitVectors(); + return 0; } @@ -140,6 +165,13 @@ EXPORT_C_(int32) GPUopen(void* hWnd) EXPORT_C_(int32) GPUconfigure() { + if(!GSUtil::CheckSSE()) + { + return -1; + } + + theApp.Init(); + #ifdef _WIN32 GPUSettingsDlg dlg; @@ -160,6 +192,11 @@ EXPORT_C_(int32) GPUconfigure() EXPORT_C_(int32) GPUtest() { + if(!GSUtil::CheckSSE()) + { + return -1; + } + return 0; } diff --git a/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.cpp b/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.cpp index 2ada9f269f..8f0f1508a2 100644 --- a/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.cpp +++ b/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.cpp @@ -1010,18 +1010,8 @@ void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& movdqa(a, b); } -const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] = -{ - GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), - GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000), - GSVector4i::zero(), -}; +GSVector4i GPUDrawScanlineCodeGenerator::m_test[8]; alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] = { {7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1}, @@ -1029,3 +1019,21 @@ alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] = {1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7}, {4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2}, }; + +void GPUDrawScanlineCodeGenerator::InitVectors() +{ + GSVector4i test[8] = + { + GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff), + GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), + GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff), + GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff), + GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff), + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff), + GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000), + GSVector4i::zero(), + }; + + for (size_t n = 0; n < countof(test); ++n) + m_test[n] = test[n]; +} diff --git a/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.h b/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.h index 9caf2c171c..d522c8146a 100644 --- a/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.h +++ b/plugins/GSdx/PSX/GPUDrawScanlineCodeGenerator.h @@ -55,6 +55,8 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator public: GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); - static const GSVector4i m_test[8]; + static GSVector4i m_test[8]; alignas(32) static const uint16 m_dither[4][16]; + + static void InitVectors(); }; diff --git a/plugins/GSdx/PSX/GPULocalMemory.cpp b/plugins/GSdx/PSX/GPULocalMemory.cpp index d7b3e513aa..4ce7779034 100644 --- a/plugins/GSdx/PSX/GPULocalMemory.cpp +++ b/plugins/GSdx/PSX/GPULocalMemory.cpp @@ -23,15 +23,23 @@ #include "GPULocalMemory.h" #include "GSdx.h" -const GSVector4i GPULocalMemory::m_xxxa(0x00008000); -const GSVector4i GPULocalMemory::m_xxbx(0x00007c00); -const GSVector4i GPULocalMemory::m_xgxx(0x000003e0); -const GSVector4i GPULocalMemory::m_rxxx(0x0000001f); +GSVector4i GPULocalMemory::m_xxxa; +GSVector4i GPULocalMemory::m_xxbx; +GSVector4i GPULocalMemory::m_xgxx; +GSVector4i GPULocalMemory::m_rxxx; #define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16)) #define VM_ALLOC_SIZE (VM_REAL_SIZE * 2) #define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32) +void GPULocalMemory::InitVectors() +{ + m_xxxa = GSVector4i(0x00008000); + m_xxbx = GSVector4i(0x00007c00); + m_xgxx = GSVector4i(0x000003e0); + m_rxxx = GSVector4i(0x0000001f); +} + GPULocalMemory::GPULocalMemory() { m_scale.x = std::min(std::max(theApp.GetConfigI("scale_x"), 0), 2); diff --git a/plugins/GSdx/PSX/GPULocalMemory.h b/plugins/GSdx/PSX/GPULocalMemory.h index 4cd94d8a82..3f5bf0b673 100644 --- a/plugins/GSdx/PSX/GPULocalMemory.h +++ b/plugins/GSdx/PSX/GPULocalMemory.h @@ -26,10 +26,10 @@ class GPULocalMemory { - static const GSVector4i m_xxxa; - static const GSVector4i m_xxbx; - static const GSVector4i m_xgxx; - static const GSVector4i m_rxxx; + static GSVector4i m_xxxa; + static GSVector4i m_xxbx; + static GSVector4i m_xgxx; + static GSVector4i m_rxxx; uint16* m_vm; @@ -50,6 +50,8 @@ class GPULocalMemory GSVector2i m_scale; public: + static void InitVectors(); + GPULocalMemory(); virtual ~GPULocalMemory(); diff --git a/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.cpp b/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.cpp index 5367fc3a0b..9539205ecf 100644 --- a/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.cpp +++ b/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.cpp @@ -220,9 +220,17 @@ void GPUSetupPrimCodeGenerator::Generate() ret(); } -const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] = +GSVector4 GPUSetupPrimCodeGenerator::m_shift[3]; + +void GPUSetupPrimCodeGenerator::InitVectors() { - GSVector4(8.0f, 8.0f, 8.0f, 8.0f), - GSVector4(0.0f, 1.0f, 2.0f, 3.0f), - GSVector4(4.0f, 5.0f, 6.0f, 7.0f), -}; + GSVector4 shift[3] = + { + GSVector4(8.0f, 8.0f, 8.0f, 8.0f), + GSVector4(0.0f, 1.0f, 2.0f, 3.0f), + GSVector4(4.0f, 5.0f, 6.0f, 7.0f), + }; + + for (size_t n = 0; n < countof(shift); ++n) + m_shift[n] = shift[n]; +} diff --git a/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.h b/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.h index 938c8b4736..056c833712 100644 --- a/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.h +++ b/plugins/GSdx/PSX/GPUSetupPrimCodeGenerator.h @@ -36,5 +36,7 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator public: GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); - static const GSVector4 m_shift[3]; -}; \ No newline at end of file + static GSVector4 m_shift[3]; + + static void InitVectors(); +};