Merge pull request #1477 from turtleli/gsdx-defer-init

gsdx: Avoid illegal instruction crash on older CPUs
This commit is contained in:
Jonathan Li 2016-08-02 23:00:19 +01:00 committed by GitHub
commit f978f9a07d
29 changed files with 611 additions and 328 deletions

View File

@ -123,6 +123,30 @@ EXPORT_C_(int) GSinit()
return -1; return -1;
} }
// Vector instructions must be avoided when initialising GSdx since PCSX2
// can crash if the CPU does not support the instruction set.
// Initialise it here instead - it's not ideal since we have to strip the
// const type qualifier from all the affected variables.
theApp.Init();
GSBlock::InitVectors();
GSClut::InitVectors();
GSDrawScanlineCodeGenerator::InitVectors();
#ifdef ENABLE_OPENCL
GSRendererCL::InitVectors();
#endif
GSRendererSW::InitVectors();
GSSetupPrimCodeGenerator::InitVectors();
GSVector4i::InitVectors();
GSVector4::InitVectors();
#if _M_SSE >= 0x500
GSVector8::InitVectors();
#endif
#if _M_SSE >= 0x501
GSVector8i::InitVectors();
#endif
GSVertexTrace::InitVectors();
#ifdef _WIN32 #ifdef _WIN32
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED); s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
@ -793,6 +817,8 @@ EXPORT_C GSconfigure()
{ {
if(!GSUtil::CheckSSE()) return; if(!GSUtil::CheckSSE()) return;
theApp.Init();
#ifdef _WIN32 #ifdef _WIN32
GSDialog::InitCommonControls(); GSDialog::InitCommonControls();
if(GSSettingsDlg().DoModal() == IDOK) if(GSSettingsDlg().DoModal() == IDOK)

View File

@ -23,26 +23,54 @@
#include "GSBlock.h" #include "GSBlock.h"
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); GSVector8i GSBlock::m_r16mask;
#else #else
const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15); GSVector4i GSBlock::m_r16mask;
#endif #endif
const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15); GSVector4i GSBlock::m_r8mask;
const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15); GSVector4i GSBlock::m_r4mask;
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
const GSVector8i GSBlock::m_xxxa(0x00008000); GSVector8i GSBlock::m_xxxa;
const GSVector8i GSBlock::m_xxbx(0x00007c00); GSVector8i GSBlock::m_xxbx;
const GSVector8i GSBlock::m_xgxx(0x000003e0); GSVector8i GSBlock::m_xgxx;
const GSVector8i GSBlock::m_rxxx(0x0000001f); GSVector8i GSBlock::m_rxxx;
#else #else
const GSVector4i GSBlock::m_xxxa(0x00008000); GSVector4i GSBlock::m_xxxa;
const GSVector4i GSBlock::m_xxbx(0x00007c00); GSVector4i GSBlock::m_xxbx;
const GSVector4i GSBlock::m_xgxx(0x000003e0); GSVector4i GSBlock::m_xgxx;
const GSVector4i GSBlock::m_rxxx(0x0000001f); GSVector4i GSBlock::m_rxxx;
#endif #endif
const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9); GSVector4i GSBlock::m_uw8hmask0;
const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11); GSVector4i GSBlock::m_uw8hmask1;
const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13); GSVector4i GSBlock::m_uw8hmask2;
const GSVector4i GSBlock::m_uw8hmask3(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15); GSVector4i GSBlock::m_uw8hmask3;
void GSBlock::InitVectors()
{
#if _M_SSE >= 0x501
m_r16mask = GSVector8i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
#else
m_r16mask = GSVector4i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
#endif
m_r8mask = GSVector4i(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
m_r4mask = GSVector4i(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
#if _M_SSE >= 0x501
m_xxxa = GSVector8i(0x00008000);
m_xxbx = GSVector8i(0x00007c00);
m_xgxx = GSVector8i(0x000003e0);
m_rxxx = GSVector8i(0x0000001f);
#else
m_xxxa = GSVector4i(0x00008000);
m_xxbx = GSVector4i(0x00007c00);
m_xgxx = GSVector4i(0x000003e0);
m_rxxx = GSVector4i(0x0000001f);
#endif
m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
}

View File

@ -28,31 +28,33 @@
class GSBlock class GSBlock
{ {
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
static const GSVector8i m_r16mask; static GSVector8i m_r16mask;
#else #else
static const GSVector4i m_r16mask; static GSVector4i m_r16mask;
#endif #endif
static const GSVector4i m_r8mask; static GSVector4i m_r8mask;
static const GSVector4i m_r4mask; static GSVector4i m_r4mask;
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
static const GSVector8i m_xxxa; static GSVector8i m_xxxa;
static const GSVector8i m_xxbx; static GSVector8i m_xxbx;
static const GSVector8i m_xgxx; static GSVector8i m_xgxx;
static const GSVector8i m_rxxx; static GSVector8i m_rxxx;
#else #else
static const GSVector4i m_xxxa; static GSVector4i m_xxxa;
static const GSVector4i m_xxbx; static GSVector4i m_xxbx;
static const GSVector4i m_xgxx; static GSVector4i m_xgxx;
static const GSVector4i m_rxxx; static GSVector4i m_rxxx;
#endif #endif
static const GSVector4i m_uw8hmask0; static GSVector4i m_uw8hmask0;
static const GSVector4i m_uw8hmask1; static GSVector4i m_uw8hmask1;
static const GSVector4i m_uw8hmask2; static GSVector4i m_uw8hmask2;
static const GSVector4i m_uw8hmask3; static GSVector4i m_uw8hmask3;
public: public:
static void InitVectors();
template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch) template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{ {
const uint8* RESTRICT s0 = &src[srcpitch * 0]; const uint8* RESTRICT s0 = &src[srcpitch * 0];

View File

@ -682,17 +682,24 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
// TODO // TODO
static const GSVector4i s_bm(0x00007c00); GSVector4i GSClut::m_bm;
static const GSVector4i s_gm(0x000003e0); GSVector4i GSClut::m_gm;
static const GSVector4i s_rm(0x0000001f); GSVector4i GSClut::m_rm;
void GSClut::InitVectors()
{
m_bm = GSVector4i(0x00007c00);
m_gm = GSVector4i(0x000003e0);
m_rm = GSVector4i(0x0000001f);
}
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA) void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
{ {
ASSERT((w & 7) == 0); ASSERT((w & 7) == 0);
const GSVector4i rm = s_rm; const GSVector4i rm = m_rm;
const GSVector4i gm = s_gm; const GSVector4i gm = m_gm;
const GSVector4i bm = s_bm; const GSVector4i bm = m_bm;
GSVector4i TA0(TEXA.TA0 << 24); GSVector4i TA0(TEXA.TA0 << 24);
GSVector4i TA1(TEXA.TA1 << 24); GSVector4i TA1(TEXA.TA1 << 24);

View File

@ -30,6 +30,10 @@ class GSLocalMemory;
class alignas(32) GSClut : public GSAlignedClass<32> class alignas(32) GSClut : public GSAlignedClass<32>
{ {
static GSVector4i m_bm;
static GSVector4i m_gm;
static GSVector4i m_rm;
GSLocalMemory* m_mem; GSLocalMemory* m_mem;
uint32 m_CBP[2]; uint32 m_CBP[2];
@ -93,6 +97,8 @@ class alignas(32) GSClut : public GSAlignedClass<32>
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA); static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
public: public:
static void InitVectors();
GSClut(GSLocalMemory* mem); GSClut(GSLocalMemory* mem);
virtual ~GSClut(); virtual ~GSClut();

View File

@ -516,7 +516,7 @@ CRC::Game CRC::m_games[] =
{0x06A7506A, SacredBlaze, JP, 0}, {0x06A7506A, SacredBlaze, JP, 0},
}; };
hash_map<uint32, CRC::Game*> CRC::m_map; map<uint32, CRC::Game*> CRC::m_map;
string ToLower( string str ) string ToLower( string str )
{ {
@ -563,7 +563,7 @@ CRC::Game CRC::Lookup(uint32 crc)
printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups); printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups);
} }
hash_map<uint32, Game*>::iterator i = m_map.find(crc); auto i = m_map.find(crc);
if(i != m_map.end()) if(i != m_map.end())
{ {

View File

@ -211,7 +211,7 @@ public:
private: private:
static Game m_games[]; static Game m_games[];
static hash_map<uint32, Game*> m_map; static map<uint32, Game*> m_map;
public: public:
static Game Lookup(uint32 crc); static Game Lookup(uint32 crc);

View File

@ -44,37 +44,55 @@ alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] =
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, {0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
}; };
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] = GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4];
#else
GSVector4i GSDrawScanlineCodeGenerator::m_test[8];
GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4];
#endif
void GSDrawScanlineCodeGenerator::InitVectors()
{ {
GSVector8(0.204446009836232697516f), #if _M_SSE >= 0x501
GSVector8(-1.04913055217340124191f), GSVector8 log2_coef[4] =
GSVector8(2.28330284476918490682f), {
GSVector8(1.0f), GSVector8(0.204446009836232697516f),
}; GSVector8(-1.04913055217340124191f),
GSVector8(2.28330284476918490682f),
GSVector8(1.0f),
};
for (size_t n = 0; n < countof(log2_coef); ++n)
m_log2_coef[n] = log2_coef[n];
#else #else
GSVector4i test[8] =
{
GSVector4i::zero(),
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
GSVector4i::zero(),
};
const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] = GSVector4 log2_coef[4] =
{ {
GSVector4i::zero(), GSVector4(0.204446009836232697516f),
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000), GSVector4(-1.04913055217340124191f),
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000), GSVector4(2.28330284476918490682f),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), GSVector4(1.0f),
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff), };
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
GSVector4i::zero(),
};
const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] = for (size_t n = 0; n < countof(test); ++n)
{ m_test[n] = test[n];
GSVector4(0.204446009836232697516f),
GSVector4(-1.04913055217340124191f), for (size_t n = 0; n < countof(log2_coef); ++n)
GSVector4(2.28330284476918490682f), m_log2_coef[n] = log2_coef[n];
GSVector4(1.0f),
};
#endif #endif
}
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)
@ -354,4 +372,4 @@ void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
#endif #endif
} }
#endif #endif

View File

@ -136,10 +136,11 @@ public:
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
alignas(8) static const uint8 m_test[16][8]; alignas(8) static const uint8 m_test[16][8];
static const GSVector8 m_log2_coef[4]; static GSVector8 m_log2_coef[4];
#else #else
static const GSVector4i m_test[8]; static GSVector4i m_test[8];
static const GSVector4 m_log2_coef[4]; static GSVector4 m_log2_coef[4];
#endif #endif
static void InitVectors();
}; };

View File

@ -74,6 +74,13 @@ typedef struct
#pragma pack(pop) #pragma pack(pop)
static GSVector4 GSRendererCL::m_pos_scale;
void GSRendererCL::InitVectors()
{
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
}
GSRendererCL::GSRendererCL() GSRendererCL::GSRendererCL()
: m_vb_count(0) : m_vb_count(0)
, m_synced(true) , m_synced(true)
@ -200,8 +207,6 @@ GSTexture* GSRendererCL::GetOutput(int i, int& y_offset)
return m_texture[i]; return m_texture[i];
} }
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
template<uint32 primclass, uint32 tme, uint32 fst> template<uint32 primclass, uint32 tme, uint32 fst>
void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count) void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
{ {
@ -214,7 +219,7 @@ void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex*
GSVector4i xyzuvf(src->m[1]); GSVector4i xyzuvf(src->m[1]);
dst->p = (GSVector4(xyzuvf.upl16() - o) * g_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints dst->p = (GSVector4(xyzuvf.upl16() - o) * m_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
GSVector4 t = GSVector4::zero(); GSVector4 t = GSVector4::zero();

View File

@ -32,6 +32,8 @@ struct alignas(32) GSVertexCL
class GSRendererCL : public GSRenderer class GSRendererCL : public GSRenderer
{ {
static GSVector4 m_pos_scale;
typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count); typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
ConvertVertexBufferPtr m_cvb[4][2][2]; ConvertVertexBufferPtr m_cvb[4][2][2];
@ -261,6 +263,8 @@ protected:
bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count); bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count);
public: public:
static void InitVectors();
GSRendererCL(); GSRendererCL();
virtual ~GSRendererCL(); virtual ~GSRendererCL();
}; };

View File

@ -26,11 +26,19 @@
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL; static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f); GSVector4 GSRendererSW::m_pos_scale;
#if _M_SSE >= 0x501
GSVector8 GSRendererSW::m_pos_scale2;
#endif
void GSRendererSW::InitVectors()
{
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
const GSVector8 g_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f); m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#endif #endif
}
GSRendererSW::GSRendererSW(int threads) GSRendererSW::GSRendererSW(int threads)
: m_fzb(NULL) : m_fzb(NULL)
@ -294,7 +302,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
GSVector8i xy = xyzuvf.upl16() - o2; GSVector8i xy = xyzuvf.upl16() - o2;
GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00()); GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * g_pos_scale2; GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * m_pos_scale2;
GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7); GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7);
GSVector8 t = GSVector8::zero(); GSVector8 t = GSVector8::zero();
@ -364,7 +372,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
#endif #endif
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale; dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * m_pos_scale;
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7); dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
GSVector4 t = GSVector4::zero(); GSVector4 t = GSVector4::zero();

View File

@ -27,6 +27,11 @@
class GSRendererSW : public GSRenderer class GSRendererSW : public GSRenderer
{ {
static GSVector4 m_pos_scale;
#if _M_SSE >= 0x501
static GSVector8 m_pos_scale2;
#endif
class SharedData : public GSDrawScanline::SharedData class SharedData : public GSDrawScanline::SharedData
{ {
struct alignas(16) TextureLevel struct alignas(16) TextureLevel
@ -95,6 +100,8 @@ protected:
bool GetScanlineGlobalData(SharedData* data); bool GetScanlineGlobalData(SharedData* data);
public: public:
static void InitVectors();
GSRendererSW(int threads); GSRendererSW(int threads);
virtual ~GSRendererSW(); virtual ~GSRendererSW();
}; };

View File

@ -23,32 +23,44 @@
#include "GSSetupPrimCodeGenerator.h" #include "GSSetupPrimCodeGenerator.h"
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
GSVector8 GSSetupPrimCodeGenerator::m_shift[9];
#else
GSVector4 GSSetupPrimCodeGenerator::m_shift[5];
#endif
const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] = void GSSetupPrimCodeGenerator::InitVectors()
{ {
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f), #if _M_SSE >= 0x501
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f), GSVector8 shift[9] =
GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f), {
GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f), GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f), GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f), GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f),
GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f), GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f),
GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f), GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f),
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f), GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f),
}; GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f),
GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f),
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
};
for (size_t n = 0; n < countof(shift); ++n)
m_shift[n] = shift[n];
#else #else
GSVector4 shift[5] =
{
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
};
const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] = for (size_t n = 0; n < countof(shift); ++n)
{ m_shift[n] = shift[n];
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
};
#endif #endif
}
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize) GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize) : GSCodeGenerator(code, maxsize)

View File

@ -43,8 +43,10 @@ public:
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
static const GSVector8 m_shift[9]; static GSVector8 m_shift[9];
#else #else
static const GSVector4 m_shift[5]; static GSVector4 m_shift[5];
#endif #endif
static void InitVectors();
}; };

View File

@ -35,58 +35,60 @@
const char* GSUtil::GetLibName() const char* GSUtil::GetLibName()
{ {
// TODO: critsec // The following ifdef mess is courtesy of "static string str;"
// being optimised by GCC to be unusable by older CPUs. Enjoy!
static char name[255];
static string str; snprintf(name, sizeof(name), "GSdx "
if(str.empty()) #ifdef _WIN32
{ "%lld "
str = "GSdx"; #endif
#ifdef _M_AMD64
"64-bit "
#endif
#ifdef __INTEL_COMPILER
"(Intel C++ %d.%02d %s)",
#elif _MSC_VER
"(MSVC %d.%02d %s)",
#elif __clang__
"(clang %d.%d.%d %s)",
#elif __GNUC__
"(GCC %d.%d.%d %s)",
#else
"(%s)",
#endif
#ifdef _WIN32
SVN_REV,
#endif
#ifdef __INTEL_COMPILER
__INTEL_COMPILER / 100, __INTEL_COMPILER % 100,
#elif _MSC_VER
_MSC_VER / 100, _MSC_VER % 100,
#elif __clang__
__clang_major__, __clang_minor__, __clang_patchlevel__,
#elif __GNUC__
__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
#endif
#ifdef _WIN32 #if _M_SSE >= 0x501
str += format(" %lld", SVN_REV); "AVX2"
if(SVN_MODS) str += "m"; #elif _M_SSE >= 0x500
#endif "AVX"
#elif _M_SSE >= 0x402
"SSE4.2"
#elif _M_SSE >= 0x401
"SSE4.1"
#elif _M_SSE >= 0x301
"SSSE3"
#elif _M_SSE >= 0x200
"SSE2"
#elif _M_SSE >= 0x100
"SSE"
#endif
);
#ifdef _M_AMD64 return name;
str += " 64-bit";
#endif
list<string> sl;
#ifdef __INTEL_COMPILER
sl.push_back(format("Intel C++ %d.%02d", __INTEL_COMPILER / 100, __INTEL_COMPILER % 100));
#elif _MSC_VER
sl.push_back(format("MSVC %d.%02d", _MSC_VER / 100, _MSC_VER % 100));
#elif __GNUC__
sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__));
#endif
#if _M_SSE >= 0x501
sl.push_back("AVX2");
#elif _M_SSE >= 0x500
sl.push_back("AVX");
#elif _M_SSE >= 0x402
sl.push_back("SSE42");
#elif _M_SSE >= 0x401
sl.push_back("SSE41");
#elif _M_SSE >= 0x301
sl.push_back("SSSE3");
#elif _M_SSE >= 0x200
sl.push_back("SSE2");
#elif _M_SSE >= 0x100
sl.push_back("SSE");
#endif
for(list<string>::iterator i = sl.begin(); i != sl.end(); )
{
if(i == sl.begin()) str += " (";
str += *i;
str += ++i != sl.end() ? ", " : ")";
}
}
return str.c_str();
} }
static class GSUtilMaps static class GSUtilMaps
@ -203,22 +205,31 @@ bool GSUtil::CheckSSE()
{ {
Xbyak::util::Cpu cpu; Xbyak::util::Cpu cpu;
Xbyak::util::Cpu::Type type; Xbyak::util::Cpu::Type type;
const char* instruction_set = "";
#if _M_SSE >= 0x500 #if _M_SSE >= 0x501
type = Xbyak::util::Cpu::tAVX2;
instruction_set = "AVX2";
#elif _M_SSE >= 0x500
type = Xbyak::util::Cpu::tAVX; type = Xbyak::util::Cpu::tAVX;
instruction_set = "AVX";
#elif _M_SSE >= 0x402 #elif _M_SSE >= 0x402
type = Xbyak::util::Cpu::tSSE42; type = Xbyak::util::Cpu::tSSE42;
instruction_set = "SSE4.2";
#elif _M_SSE >= 0x401 #elif _M_SSE >= 0x401
type = Xbyak::util::Cpu::tSSE41; type = Xbyak::util::Cpu::tSSE41;
instruction_set = "SSE4.1";
#elif _M_SSE >= 0x301 #elif _M_SSE >= 0x301
type = Xbyak::util::Cpu::tSSSE3; type = Xbyak::util::Cpu::tSSSE3;
instruction_set = "SSSE3";
#elif _M_SSE >= 0x200 #elif _M_SSE >= 0x200
type = Xbyak::util::Cpu::tSSE2; type = Xbyak::util::Cpu::tSSE2;
instruction_set = "SSE2";
#endif #endif
if(!cpu.has(type)) if(!cpu.has(type))
{ {
fprintf(stderr, "This CPU does not support SSE %d.%02d", _M_SSE >> 8, _M_SSE & 0xff); fprintf(stderr, "This CPU does not support %s\n", instruction_set);
return false; return false;
} }

View File

@ -22,148 +22,196 @@
#include "stdafx.h" #include "stdafx.h"
#include "GSVector.h" #include "GSVector.h"
const GSVector4i GSVector4i::m_xff[17] = GSVector4i GSVector4i::m_xff[17];
{ GSVector4i GSVector4i::m_x0f[17];
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector4i GSVector4i::m_x0f[17] = void GSVector4i::InitVectors()
{ {
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector4i xff[17] =
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000), {
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
}; GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f); GSVector4i x0f[17] =
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f); {
const GSVector4 GSVector4::m_half(0.5f); GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_one(1.0f); GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_two(2.0f); GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_four(4.0f); GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000))); GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000))); GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_max(FLT_MAX); GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
const GSVector4 GSVector4::m_min(FLT_MIN); GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
};
for (size_t n = 0; n < countof(xff); ++n)
m_xff[n] = xff[n];
for (size_t n = 0; n < countof(x0f); ++n)
m_x0f[n] = x0f[n];
}
GSVector4 GSVector4::m_ps0123;
GSVector4 GSVector4::m_ps4567;
GSVector4 GSVector4::m_half;
GSVector4 GSVector4::m_one;
GSVector4 GSVector4::m_two;
GSVector4 GSVector4::m_four;
GSVector4 GSVector4::m_x4b000000;
GSVector4 GSVector4::m_x4f800000;
GSVector4 GSVector4::m_max;
GSVector4 GSVector4::m_min;
void GSVector4::InitVectors()
{
m_ps0123 = GSVector4(0.0f, 1.0f, 2.0f, 3.0f);
m_ps4567 = GSVector4(4.0f, 5.0f, 6.0f, 7.0f);
m_half = GSVector4(0.5f);
m_one = GSVector4(1.0f);
m_two = GSVector4(2.0f);
m_four = GSVector4(4.0f);
m_x4b000000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
m_x4f800000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
m_max = GSVector4(FLT_MAX);
m_min = GSVector4(FLT_MIN);
}
#if _M_SSE >= 0x500 #if _M_SSE >= 0x500
const GSVector8 GSVector8::m_half(0.5f); GSVector8 GSVector8::m_half;
const GSVector8 GSVector8::m_one(1.0f); GSVector8 GSVector8::m_one;
const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff))); GSVector8 GSVector8::m_x7fffffff;
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000))); GSVector8 GSVector8::m_x80000000;
const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000))); GSVector8 GSVector8::m_x4b000000;
const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000))); GSVector8 GSVector8::m_x4f800000;
const GSVector8 GSVector8::m_max(FLT_MAX); GSVector8 GSVector8::m_max;
const GSVector8 GSVector8::m_min(FLT_MIN); GSVector8 GSVector8::m_min;
void GSVector8::InitVectors()
{
m_half = GSVector8(0.5f);
m_one = GSVector8(1.0f);
m_x7fffffff = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
m_x80000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
m_x4b000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
m_x4f800000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
m_max = GSVector8(FLT_MAX);
m_min = GSVector8(FLT_MIN);
}
#endif #endif
#if _M_SSE >= 0x501 #if _M_SSE >= 0x501
GSVector8i GSVector8i::m_xff[33];
GSVector8i GSVector8i::m_x0f[33];
const GSVector8i GSVector8i::m_xff[33] = void GSVector8i::InitVectors()
{ {
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i xff[33] =
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), {
GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff), GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
}; GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector8i GSVector8i::m_x0f[33] = GSVector8i x0f[33] =
{ {
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f), GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
}; };
for (size_t n = 0; n < countof(xff); ++n)
m_xff[n] = xff[n];
for (size_t n = 0; n < countof(x0f); ++n)
m_x0f[n] = x0f[n];
}
#endif #endif
GSVector4i GSVector4i::fit(int arx, int ary) const GSVector4i GSVector4i::fit(int arx, int ary) const

View File

@ -92,8 +92,8 @@ class GSVector8i;
class alignas(16) GSVector4i class alignas(16) GSVector4i
{ {
static const GSVector4i m_xff[17]; static GSVector4i m_xff[17];
static const GSVector4i m_x0f[17]; static GSVector4i m_x0f[17];
public: public:
union union
@ -114,6 +114,8 @@ public:
__m128i m; __m128i m;
}; };
static void InitVectors();
__forceinline GSVector4i() __forceinline GSVector4i()
{ {
} }
@ -2442,16 +2444,18 @@ public:
__m128 m; __m128 m;
}; };
static const GSVector4 m_ps0123; static GSVector4 m_ps0123;
static const GSVector4 m_ps4567; static GSVector4 m_ps4567;
static const GSVector4 m_half; static GSVector4 m_half;
static const GSVector4 m_one; static GSVector4 m_one;
static const GSVector4 m_two; static GSVector4 m_two;
static const GSVector4 m_four; static GSVector4 m_four;
static const GSVector4 m_x4b000000; static GSVector4 m_x4b000000;
static const GSVector4 m_x4f800000; static GSVector4 m_x4f800000;
static const GSVector4 m_max; static GSVector4 m_max;
static const GSVector4 m_min; static GSVector4 m_min;
static void InitVectors();
__forceinline GSVector4() __forceinline GSVector4()
{ {
@ -3343,8 +3347,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
class alignas(32) GSVector8i class alignas(32) GSVector8i
{ {
static const GSVector8i m_xff[33]; static GSVector8i m_xff[33];
static const GSVector8i m_x0f[33]; static GSVector8i m_x0f[33];
public: public:
union union
@ -3365,6 +3369,8 @@ public:
__m128i m0, m1; __m128i m0, m1;
}; };
static void InitVectors();
__forceinline GSVector8i() {} __forceinline GSVector8i() {}
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true); __forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
@ -5154,14 +5160,16 @@ public:
__m128 m0, m1; __m128 m0, m1;
}; };
static const GSVector8 m_half; static GSVector8 m_half;
static const GSVector8 m_one; static GSVector8 m_one;
static const GSVector8 m_x7fffffff; static GSVector8 m_x7fffffff;
static const GSVector8 m_x80000000; static GSVector8 m_x80000000;
static const GSVector8 m_x4b000000; static GSVector8 m_x4b000000;
static const GSVector8 m_x4f800000; static GSVector8 m_x4f800000;
static const GSVector8 m_max; static GSVector8 m_max;
static const GSVector8 m_min; static GSVector8 m_min;
static void InitVectors();
__forceinline GSVector8() __forceinline GSVector8()
{ {

View File

@ -24,7 +24,12 @@
#include "GSUtil.h" #include "GSUtil.h"
#include "GSState.h" #include "GSState.h"
const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX); GSVector4 GSVertexTrace::s_minmax;
void GSVertexTrace::InitVectors()
{
s_minmax = GSVector4(FLT_MAX, -FLT_MAX);
}
GSVertexTrace::GSVertexTrace(const GSState* state) GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state) : m_state(state)

View File

@ -38,7 +38,7 @@ public:
protected: protected:
const GSState* m_state; const GSState* m_state;
static const GSVector4 s_minmax; static GSVector4 s_minmax;
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count); typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
@ -69,6 +69,8 @@ public:
GSVector2 m_lod; // x = min, y = max GSVector2 m_lod; // x = min, y = max
public: public:
static void InitVectors();
GSVertexTrace(const GSState* state); GSVertexTrace(const GSState* state);
virtual ~GSVertexTrace() {} virtual ~GSVertexTrace() {}

View File

@ -127,6 +127,21 @@ GSdxApp theApp;
GSdxApp::GSdxApp() GSdxApp::GSdxApp()
{ {
// Empty constructor causes an illegal instruction exception on an SSE4.2 machine on Windows.
// Non-empty doesn't, but raises a SIGILL signal when compiled against GCC 6.1.1.
// So here's a compromise.
#ifdef _WIN32
Init();
#endif
}
void GSdxApp::Init()
{
static bool is_initialised = false;
if (is_initialised)
return;
is_initialised = true;
m_ini = "inis/GSdx.ini"; m_ini = "inis/GSdx.ini";
m_section = "Settings"; m_section = "Settings";

View File

@ -35,7 +35,8 @@ class GSdxApp
public: public:
GSdxApp(); GSdxApp();
void* GetModuleHandlePtr(); void Init();
void* GetModuleHandlePtr();
#ifdef _WIN32 #ifdef _WIN32
HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();} HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();}

View File

@ -57,8 +57,33 @@ EXPORT_C_(uint32) PSEgetLibVersion()
return version << 16 | revision << 8 | PLUGIN_VERSION; return version << 16 | revision << 8 | PLUGIN_VERSION;
} }
static void InitVectors()
{
GSVector4i::InitVectors();
GSVector4::InitVectors();
#if _M_SSE >= 0x500
GSVector8::InitVectors();
#endif
#if _M_SSE >= 0x501
GSVector8i::InitVectors();
#endif
GPUDrawScanlineCodeGenerator::InitVectors();
GPULocalMemory::InitVectors();
GPUSetupPrimCodeGenerator::InitVectors();
}
EXPORT_C_(int32) GPUinit() EXPORT_C_(int32) GPUinit()
{ {
if(!GSUtil::CheckSSE())
{
return -1;
}
theApp.Init();
InitVectors();
return 0; return 0;
} }
@ -140,6 +165,13 @@ EXPORT_C_(int32) GPUopen(void* hWnd)
EXPORT_C_(int32) GPUconfigure() EXPORT_C_(int32) GPUconfigure()
{ {
if(!GSUtil::CheckSSE())
{
return -1;
}
theApp.Init();
#ifdef _WIN32 #ifdef _WIN32
GPUSettingsDlg dlg; GPUSettingsDlg dlg;
@ -160,6 +192,11 @@ EXPORT_C_(int32) GPUconfigure()
EXPORT_C_(int32) GPUtest() EXPORT_C_(int32) GPUtest()
{ {
if(!GSUtil::CheckSSE())
{
return -1;
}
return 0; return 0;
} }

View File

@ -1010,18 +1010,8 @@ void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm&
movdqa(a, b); movdqa(a, b);
} }
const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
{
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000),
GSVector4i::zero(),
};
GSVector4i GPUDrawScanlineCodeGenerator::m_test[8];
alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] = alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
{ {
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1}, {7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
@ -1029,3 +1019,21 @@ alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7}, {1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2}, {4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
}; };
void GPUDrawScanlineCodeGenerator::InitVectors()
{
GSVector4i test[8] =
{
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000),
GSVector4i::zero(),
};
for (size_t n = 0; n < countof(test); ++n)
m_test[n] = test[n];
}

View File

@ -55,6 +55,8 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
public: public:
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
static const GSVector4i m_test[8]; static GSVector4i m_test[8];
alignas(32) static const uint16 m_dither[4][16]; alignas(32) static const uint16 m_dither[4][16];
static void InitVectors();
}; };

View File

@ -23,15 +23,23 @@
#include "GPULocalMemory.h" #include "GPULocalMemory.h"
#include "GSdx.h" #include "GSdx.h"
const GSVector4i GPULocalMemory::m_xxxa(0x00008000); GSVector4i GPULocalMemory::m_xxxa;
const GSVector4i GPULocalMemory::m_xxbx(0x00007c00); GSVector4i GPULocalMemory::m_xxbx;
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0); GSVector4i GPULocalMemory::m_xgxx;
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f); GSVector4i GPULocalMemory::m_rxxx;
#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16)) #define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16))
#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2) #define VM_ALLOC_SIZE (VM_REAL_SIZE * 2)
#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32) #define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32)
void GPULocalMemory::InitVectors()
{
m_xxxa = GSVector4i(0x00008000);
m_xxbx = GSVector4i(0x00007c00);
m_xgxx = GSVector4i(0x000003e0);
m_rxxx = GSVector4i(0x0000001f);
}
GPULocalMemory::GPULocalMemory() GPULocalMemory::GPULocalMemory()
{ {
m_scale.x = std::min<int>(std::max<int>(theApp.GetConfigI("scale_x"), 0), 2); m_scale.x = std::min<int>(std::max<int>(theApp.GetConfigI("scale_x"), 0), 2);

View File

@ -26,10 +26,10 @@
class GPULocalMemory class GPULocalMemory
{ {
static const GSVector4i m_xxxa; static GSVector4i m_xxxa;
static const GSVector4i m_xxbx; static GSVector4i m_xxbx;
static const GSVector4i m_xgxx; static GSVector4i m_xgxx;
static const GSVector4i m_rxxx; static GSVector4i m_rxxx;
uint16* m_vm; uint16* m_vm;
@ -50,6 +50,8 @@ class GPULocalMemory
GSVector2i m_scale; GSVector2i m_scale;
public: public:
static void InitVectors();
GPULocalMemory(); GPULocalMemory();
virtual ~GPULocalMemory(); virtual ~GPULocalMemory();

View File

@ -220,9 +220,17 @@ void GPUSetupPrimCodeGenerator::Generate()
ret(); ret();
} }
const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] = GSVector4 GPUSetupPrimCodeGenerator::m_shift[3];
void GPUSetupPrimCodeGenerator::InitVectors()
{ {
GSVector4(8.0f, 8.0f, 8.0f, 8.0f), GSVector4 shift[3] =
GSVector4(0.0f, 1.0f, 2.0f, 3.0f), {
GSVector4(4.0f, 5.0f, 6.0f, 7.0f), GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
}; GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
};
for (size_t n = 0; n < countof(shift); ++n)
m_shift[n] = shift[n];
}

View File

@ -36,5 +36,7 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
public: public:
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize); GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
static const GSVector4 m_shift[3]; static GSVector4 m_shift[3];
};
static void InitVectors();
};