Merge pull request #1477 from turtleli/gsdx-defer-init

gsdx: Avoid illegal instruction crash on older CPUs
This commit is contained in:
Jonathan Li 2016-08-02 23:00:19 +01:00 committed by GitHub
commit f978f9a07d
29 changed files with 611 additions and 328 deletions

View File

@ -123,6 +123,30 @@ EXPORT_C_(int) GSinit()
return -1;
}
// Vector instructions must be avoided when initialising GSdx since PCSX2
// can crash if the CPU does not support the instruction set.
// Initialise it here instead - it's not ideal since we have to strip the
// const type qualifier from all the affected variables.
theApp.Init();
GSBlock::InitVectors();
GSClut::InitVectors();
GSDrawScanlineCodeGenerator::InitVectors();
#ifdef ENABLE_OPENCL
GSRendererCL::InitVectors();
#endif
GSRendererSW::InitVectors();
GSSetupPrimCodeGenerator::InitVectors();
GSVector4i::InitVectors();
GSVector4::InitVectors();
#if _M_SSE >= 0x500
GSVector8::InitVectors();
#endif
#if _M_SSE >= 0x501
GSVector8i::InitVectors();
#endif
GSVertexTrace::InitVectors();
#ifdef _WIN32
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
@ -793,6 +817,8 @@ EXPORT_C GSconfigure()
{
if(!GSUtil::CheckSSE()) return;
theApp.Init();
#ifdef _WIN32
GSDialog::InitCommonControls();
if(GSSettingsDlg().DoModal() == IDOK)

View File

@ -23,26 +23,54 @@
#include "GSBlock.h"
#if _M_SSE >= 0x501
const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
GSVector8i GSBlock::m_r16mask;
#else
const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
GSVector4i GSBlock::m_r16mask;
#endif
const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
GSVector4i GSBlock::m_r8mask;
GSVector4i GSBlock::m_r4mask;
#if _M_SSE >= 0x501
const GSVector8i GSBlock::m_xxxa(0x00008000);
const GSVector8i GSBlock::m_xxbx(0x00007c00);
const GSVector8i GSBlock::m_xgxx(0x000003e0);
const GSVector8i GSBlock::m_rxxx(0x0000001f);
GSVector8i GSBlock::m_xxxa;
GSVector8i GSBlock::m_xxbx;
GSVector8i GSBlock::m_xgxx;
GSVector8i GSBlock::m_rxxx;
#else
const GSVector4i GSBlock::m_xxxa(0x00008000);
const GSVector4i GSBlock::m_xxbx(0x00007c00);
const GSVector4i GSBlock::m_xgxx(0x000003e0);
const GSVector4i GSBlock::m_rxxx(0x0000001f);
GSVector4i GSBlock::m_xxxa;
GSVector4i GSBlock::m_xxbx;
GSVector4i GSBlock::m_xgxx;
GSVector4i GSBlock::m_rxxx;
#endif
const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
const GSVector4i GSBlock::m_uw8hmask3(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
GSVector4i GSBlock::m_uw8hmask0;
GSVector4i GSBlock::m_uw8hmask1;
GSVector4i GSBlock::m_uw8hmask2;
GSVector4i GSBlock::m_uw8hmask3;
void GSBlock::InitVectors()
{
#if _M_SSE >= 0x501
m_r16mask = GSVector8i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
#else
m_r16mask = GSVector4i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
#endif
m_r8mask = GSVector4i(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
m_r4mask = GSVector4i(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
#if _M_SSE >= 0x501
m_xxxa = GSVector8i(0x00008000);
m_xxbx = GSVector8i(0x00007c00);
m_xgxx = GSVector8i(0x000003e0);
m_rxxx = GSVector8i(0x0000001f);
#else
m_xxxa = GSVector4i(0x00008000);
m_xxbx = GSVector4i(0x00007c00);
m_xgxx = GSVector4i(0x000003e0);
m_rxxx = GSVector4i(0x0000001f);
#endif
m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
}

View File

@ -28,31 +28,33 @@
class GSBlock
{
#if _M_SSE >= 0x501
static const GSVector8i m_r16mask;
static GSVector8i m_r16mask;
#else
static const GSVector4i m_r16mask;
static GSVector4i m_r16mask;
#endif
static const GSVector4i m_r8mask;
static const GSVector4i m_r4mask;
static GSVector4i m_r8mask;
static GSVector4i m_r4mask;
#if _M_SSE >= 0x501
static const GSVector8i m_xxxa;
static const GSVector8i m_xxbx;
static const GSVector8i m_xgxx;
static const GSVector8i m_rxxx;
static GSVector8i m_xxxa;
static GSVector8i m_xxbx;
static GSVector8i m_xgxx;
static GSVector8i m_rxxx;
#else
static const GSVector4i m_xxxa;
static const GSVector4i m_xxbx;
static const GSVector4i m_xgxx;
static const GSVector4i m_rxxx;
static GSVector4i m_xxxa;
static GSVector4i m_xxbx;
static GSVector4i m_xgxx;
static GSVector4i m_rxxx;
#endif
static const GSVector4i m_uw8hmask0;
static const GSVector4i m_uw8hmask1;
static const GSVector4i m_uw8hmask2;
static const GSVector4i m_uw8hmask3;
static GSVector4i m_uw8hmask0;
static GSVector4i m_uw8hmask1;
static GSVector4i m_uw8hmask2;
static GSVector4i m_uw8hmask3;
public:
static void InitVectors();
template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
{
const uint8* RESTRICT s0 = &src[srcpitch * 0];

View File

@ -682,17 +682,24 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
// TODO
static const GSVector4i s_bm(0x00007c00);
static const GSVector4i s_gm(0x000003e0);
static const GSVector4i s_rm(0x0000001f);
GSVector4i GSClut::m_bm;
GSVector4i GSClut::m_gm;
GSVector4i GSClut::m_rm;
void GSClut::InitVectors()
{
m_bm = GSVector4i(0x00007c00);
m_gm = GSVector4i(0x000003e0);
m_rm = GSVector4i(0x0000001f);
}
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
{
ASSERT((w & 7) == 0);
const GSVector4i rm = s_rm;
const GSVector4i gm = s_gm;
const GSVector4i bm = s_bm;
const GSVector4i rm = m_rm;
const GSVector4i gm = m_gm;
const GSVector4i bm = m_bm;
GSVector4i TA0(TEXA.TA0 << 24);
GSVector4i TA1(TEXA.TA1 << 24);

View File

@ -30,6 +30,10 @@ class GSLocalMemory;
class alignas(32) GSClut : public GSAlignedClass<32>
{
static GSVector4i m_bm;
static GSVector4i m_gm;
static GSVector4i m_rm;
GSLocalMemory* m_mem;
uint32 m_CBP[2];
@ -93,6 +97,8 @@ class alignas(32) GSClut : public GSAlignedClass<32>
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
public:
static void InitVectors();
GSClut(GSLocalMemory* mem);
virtual ~GSClut();

View File

@ -516,7 +516,7 @@ CRC::Game CRC::m_games[] =
{0x06A7506A, SacredBlaze, JP, 0},
};
hash_map<uint32, CRC::Game*> CRC::m_map;
map<uint32, CRC::Game*> CRC::m_map;
string ToLower( string str )
{
@ -563,7 +563,7 @@ CRC::Game CRC::Lookup(uint32 crc)
printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups);
}
hash_map<uint32, Game*>::iterator i = m_map.find(crc);
auto i = m_map.find(crc);
if(i != m_map.end())
{

View File

@ -211,7 +211,7 @@ public:
private:
static Game m_games[];
static hash_map<uint32, Game*> m_map;
static map<uint32, Game*> m_map;
public:
static Game Lookup(uint32 crc);

View File

@ -44,7 +44,16 @@ alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] =
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
};
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4];
#else
GSVector4i GSDrawScanlineCodeGenerator::m_test[8];
GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4];
#endif
void GSDrawScanlineCodeGenerator::InitVectors()
{
#if _M_SSE >= 0x501
GSVector8 log2_coef[4] =
{
GSVector8(0.204446009836232697516f),
GSVector8(-1.04913055217340124191f),
@ -52,9 +61,11 @@ const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
GSVector8(1.0f),
};
#else
for (size_t n = 0; n < countof(log2_coef); ++n)
m_log2_coef[n] = log2_coef[n];
const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
#else
GSVector4i test[8] =
{
GSVector4i::zero(),
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
@ -66,7 +77,7 @@ const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
GSVector4i::zero(),
};
const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
GSVector4 log2_coef[4] =
{
GSVector4(0.204446009836232697516f),
GSVector4(-1.04913055217340124191f),
@ -74,7 +85,14 @@ const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
GSVector4(1.0f),
};
for (size_t n = 0; n < countof(test); ++n)
m_test[n] = test[n];
for (size_t n = 0; n < countof(log2_coef); ++n)
m_log2_coef[n] = log2_coef[n];
#endif
}
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)

View File

@ -136,10 +136,11 @@ public:
#if _M_SSE >= 0x501
alignas(8) static const uint8 m_test[16][8];
static const GSVector8 m_log2_coef[4];
static GSVector8 m_log2_coef[4];
#else
static const GSVector4i m_test[8];
static const GSVector4 m_log2_coef[4];
static GSVector4i m_test[8];
static GSVector4 m_log2_coef[4];
#endif
static void InitVectors();
};

View File

@ -74,6 +74,13 @@ typedef struct
#pragma pack(pop)
static GSVector4 GSRendererCL::m_pos_scale;
void GSRendererCL::InitVectors()
{
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
}
GSRendererCL::GSRendererCL()
: m_vb_count(0)
, m_synced(true)
@ -200,8 +207,6 @@ GSTexture* GSRendererCL::GetOutput(int i, int& y_offset)
return m_texture[i];
}
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
template<uint32 primclass, uint32 tme, uint32 fst>
void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
{
@ -214,7 +219,7 @@ void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex*
GSVector4i xyzuvf(src->m[1]);
dst->p = (GSVector4(xyzuvf.upl16() - o) * g_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
dst->p = (GSVector4(xyzuvf.upl16() - o) * m_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
GSVector4 t = GSVector4::zero();

View File

@ -32,6 +32,8 @@ struct alignas(32) GSVertexCL
class GSRendererCL : public GSRenderer
{
static GSVector4 m_pos_scale;
typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
ConvertVertexBufferPtr m_cvb[4][2][2];
@ -261,6 +263,8 @@ protected:
bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count);
public:
static void InitVectors();
GSRendererCL();
virtual ~GSRendererCL();
};

View File

@ -26,11 +26,19 @@
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSVector4 GSRendererSW::m_pos_scale;
#if _M_SSE >= 0x501
GSVector8 GSRendererSW::m_pos_scale2;
#endif
void GSRendererSW::InitVectors()
{
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#if _M_SSE >= 0x501
const GSVector8 g_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
#endif
}
GSRendererSW::GSRendererSW(int threads)
: m_fzb(NULL)
@ -294,7 +302,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
GSVector8i xy = xyzuvf.upl16() - o2;
GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * g_pos_scale2;
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * m_pos_scale2;
GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7);
GSVector8 t = GSVector8::zero();
@ -364,7 +372,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
#endif
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * m_pos_scale;
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
GSVector4 t = GSVector4::zero();

View File

@ -27,6 +27,11 @@
class GSRendererSW : public GSRenderer
{
static GSVector4 m_pos_scale;
#if _M_SSE >= 0x501
static GSVector8 m_pos_scale2;
#endif
class SharedData : public GSDrawScanline::SharedData
{
struct alignas(16) TextureLevel
@ -95,6 +100,8 @@ protected:
bool GetScanlineGlobalData(SharedData* data);
public:
static void InitVectors();
GSRendererSW(int threads);
virtual ~GSRendererSW();
};

View File

@ -23,8 +23,15 @@
#include "GSSetupPrimCodeGenerator.h"
#if _M_SSE >= 0x501
GSVector8 GSSetupPrimCodeGenerator::m_shift[9];
#else
GSVector4 GSSetupPrimCodeGenerator::m_shift[5];
#endif
const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] =
void GSSetupPrimCodeGenerator::InitVectors()
{
#if _M_SSE >= 0x501
GSVector8 shift[9] =
{
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
@ -37,9 +44,11 @@ const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] =
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
};
#else
for (size_t n = 0; n < countof(shift); ++n)
m_shift[n] = shift[n];
const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
#else
GSVector4 shift[5] =
{
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
@ -48,7 +57,10 @@ const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
};
for (size_t n = 0; n < countof(shift); ++n)
m_shift[n] = shift[n];
#endif
}
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)

View File

@ -43,8 +43,10 @@ public:
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
#if _M_SSE >= 0x501
static const GSVector8 m_shift[9];
static GSVector8 m_shift[9];
#else
static const GSVector4 m_shift[5];
static GSVector4 m_shift[5];
#endif
static void InitVectors();
};

View File

@ -35,58 +35,60 @@
const char* GSUtil::GetLibName()
{
// TODO: critsec
// The following ifdef mess is courtesy of "static string str;"
// being optimised by GCC to be unusable by older CPUs. Enjoy!
static char name[255];
static string str;
if(str.empty())
{
str = "GSdx";
snprintf(name, sizeof(name), "GSdx "
#ifdef _WIN32
str += format(" %lld", SVN_REV);
if(SVN_MODS) str += "m";
"%lld "
#endif
#ifdef _M_AMD64
str += " 64-bit";
"64-bit "
#endif
list<string> sl;
#ifdef __INTEL_COMPILER
sl.push_back(format("Intel C++ %d.%02d", __INTEL_COMPILER / 100, __INTEL_COMPILER % 100));
"(Intel C++ %d.%02d %s)",
#elif _MSC_VER
sl.push_back(format("MSVC %d.%02d", _MSC_VER / 100, _MSC_VER % 100));
"(MSVC %d.%02d %s)",
#elif __clang__
"(clang %d.%d.%d %s)",
#elif __GNUC__
sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__));
"(GCC %d.%d.%d %s)",
#else
"(%s)",
#endif
#ifdef _WIN32
SVN_REV,
#endif
#ifdef __INTEL_COMPILER
__INTEL_COMPILER / 100, __INTEL_COMPILER % 100,
#elif _MSC_VER
_MSC_VER / 100, _MSC_VER % 100,
#elif __clang__
__clang_major__, __clang_minor__, __clang_patchlevel__,
#elif __GNUC__
__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
#endif
#if _M_SSE >= 0x501
sl.push_back("AVX2");
"AVX2"
#elif _M_SSE >= 0x500
sl.push_back("AVX");
"AVX"
#elif _M_SSE >= 0x402
sl.push_back("SSE42");
"SSE4.2"
#elif _M_SSE >= 0x401
sl.push_back("SSE41");
"SSE4.1"
#elif _M_SSE >= 0x301
sl.push_back("SSSE3");
"SSSE3"
#elif _M_SSE >= 0x200
sl.push_back("SSE2");
"SSE2"
#elif _M_SSE >= 0x100
sl.push_back("SSE");
"SSE"
#endif
);
for(list<string>::iterator i = sl.begin(); i != sl.end(); )
{
if(i == sl.begin()) str += " (";
str += *i;
str += ++i != sl.end() ? ", " : ")";
}
}
return str.c_str();
return name;
}
static class GSUtilMaps
@ -203,22 +205,31 @@ bool GSUtil::CheckSSE()
{
Xbyak::util::Cpu cpu;
Xbyak::util::Cpu::Type type;
const char* instruction_set = "";
#if _M_SSE >= 0x500
#if _M_SSE >= 0x501
type = Xbyak::util::Cpu::tAVX2;
instruction_set = "AVX2";
#elif _M_SSE >= 0x500
type = Xbyak::util::Cpu::tAVX;
instruction_set = "AVX";
#elif _M_SSE >= 0x402
type = Xbyak::util::Cpu::tSSE42;
instruction_set = "SSE4.2";
#elif _M_SSE >= 0x401
type = Xbyak::util::Cpu::tSSE41;
instruction_set = "SSE4.1";
#elif _M_SSE >= 0x301
type = Xbyak::util::Cpu::tSSSE3;
instruction_set = "SSSE3";
#elif _M_SSE >= 0x200
type = Xbyak::util::Cpu::tSSE2;
instruction_set = "SSE2";
#endif
if(!cpu.has(type))
{
fprintf(stderr, "This CPU does not support SSE %d.%02d", _M_SSE >> 8, _M_SSE & 0xff);
fprintf(stderr, "This CPU does not support %s\n", instruction_set);
return false;
}

View File

@ -22,7 +22,12 @@
#include "stdafx.h"
#include "GSVector.h"
const GSVector4i GSVector4i::m_xff[17] =
GSVector4i GSVector4i::m_xff[17];
GSVector4i GSVector4i::m_x0f[17];
void GSVector4i::InitVectors()
{
GSVector4i xff[17] =
{
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
@ -43,7 +48,7 @@ const GSVector4i GSVector4i::m_xff[17] =
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector4i GSVector4i::m_x0f[17] =
GSVector4i x0f[17] =
{
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
@ -64,33 +69,70 @@ const GSVector4i GSVector4i::m_x0f[17] =
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
};
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
const GSVector4 GSVector4::m_half(0.5f);
const GSVector4 GSVector4::m_one(1.0f);
const GSVector4 GSVector4::m_two(2.0f);
const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
const GSVector4 GSVector4::m_max(FLT_MAX);
const GSVector4 GSVector4::m_min(FLT_MIN);
for (size_t n = 0; n < countof(xff); ++n)
m_xff[n] = xff[n];
for (size_t n = 0; n < countof(x0f); ++n)
m_x0f[n] = x0f[n];
}
GSVector4 GSVector4::m_ps0123;
GSVector4 GSVector4::m_ps4567;
GSVector4 GSVector4::m_half;
GSVector4 GSVector4::m_one;
GSVector4 GSVector4::m_two;
GSVector4 GSVector4::m_four;
GSVector4 GSVector4::m_x4b000000;
GSVector4 GSVector4::m_x4f800000;
GSVector4 GSVector4::m_max;
GSVector4 GSVector4::m_min;
void GSVector4::InitVectors()
{
m_ps0123 = GSVector4(0.0f, 1.0f, 2.0f, 3.0f);
m_ps4567 = GSVector4(4.0f, 5.0f, 6.0f, 7.0f);
m_half = GSVector4(0.5f);
m_one = GSVector4(1.0f);
m_two = GSVector4(2.0f);
m_four = GSVector4(4.0f);
m_x4b000000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
m_x4f800000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
m_max = GSVector4(FLT_MAX);
m_min = GSVector4(FLT_MIN);
}
#if _M_SSE >= 0x500
const GSVector8 GSVector8::m_half(0.5f);
const GSVector8 GSVector8::m_one(1.0f);
const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
const GSVector8 GSVector8::m_max(FLT_MAX);
const GSVector8 GSVector8::m_min(FLT_MIN);
GSVector8 GSVector8::m_half;
GSVector8 GSVector8::m_one;
GSVector8 GSVector8::m_x7fffffff;
GSVector8 GSVector8::m_x80000000;
GSVector8 GSVector8::m_x4b000000;
GSVector8 GSVector8::m_x4f800000;
GSVector8 GSVector8::m_max;
GSVector8 GSVector8::m_min;
void GSVector8::InitVectors()
{
m_half = GSVector8(0.5f);
m_one = GSVector8(1.0f);
m_x7fffffff = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
m_x80000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
m_x4b000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
m_x4f800000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
m_max = GSVector8(FLT_MAX);
m_min = GSVector8(FLT_MIN);
}
#endif
#if _M_SSE >= 0x501
GSVector8i GSVector8i::m_xff[33];
GSVector8i GSVector8i::m_x0f[33];
const GSVector8i GSVector8i::m_xff[33] =
void GSVector8i::InitVectors()
{
GSVector8i xff[33] =
{
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
@ -127,7 +169,7 @@ const GSVector8i GSVector8i::m_xff[33] =
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector8i GSVector8i::m_x0f[33] =
GSVector8i x0f[33] =
{
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
@ -164,6 +206,12 @@ const GSVector8i GSVector8i::m_x0f[33] =
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
};
for (size_t n = 0; n < countof(xff); ++n)
m_xff[n] = xff[n];
for (size_t n = 0; n < countof(x0f); ++n)
m_x0f[n] = x0f[n];
}
#endif
GSVector4i GSVector4i::fit(int arx, int ary) const

View File

@ -92,8 +92,8 @@ class GSVector8i;
class alignas(16) GSVector4i
{
static const GSVector4i m_xff[17];
static const GSVector4i m_x0f[17];
static GSVector4i m_xff[17];
static GSVector4i m_x0f[17];
public:
union
@ -114,6 +114,8 @@ public:
__m128i m;
};
static void InitVectors();
__forceinline GSVector4i()
{
}
@ -2442,16 +2444,18 @@ public:
__m128 m;
};
static const GSVector4 m_ps0123;
static const GSVector4 m_ps4567;
static const GSVector4 m_half;
static const GSVector4 m_one;
static const GSVector4 m_two;
static const GSVector4 m_four;
static const GSVector4 m_x4b000000;
static const GSVector4 m_x4f800000;
static const GSVector4 m_max;
static const GSVector4 m_min;
static GSVector4 m_ps0123;
static GSVector4 m_ps4567;
static GSVector4 m_half;
static GSVector4 m_one;
static GSVector4 m_two;
static GSVector4 m_four;
static GSVector4 m_x4b000000;
static GSVector4 m_x4f800000;
static GSVector4 m_max;
static GSVector4 m_min;
static void InitVectors();
__forceinline GSVector4()
{
@ -3343,8 +3347,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
class alignas(32) GSVector8i
{
static const GSVector8i m_xff[33];
static const GSVector8i m_x0f[33];
static GSVector8i m_xff[33];
static GSVector8i m_x0f[33];
public:
union
@ -3365,6 +3369,8 @@ public:
__m128i m0, m1;
};
static void InitVectors();
__forceinline GSVector8i() {}
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
@ -5154,14 +5160,16 @@ public:
__m128 m0, m1;
};
static const GSVector8 m_half;
static const GSVector8 m_one;
static const GSVector8 m_x7fffffff;
static const GSVector8 m_x80000000;
static const GSVector8 m_x4b000000;
static const GSVector8 m_x4f800000;
static const GSVector8 m_max;
static const GSVector8 m_min;
static GSVector8 m_half;
static GSVector8 m_one;
static GSVector8 m_x7fffffff;
static GSVector8 m_x80000000;
static GSVector8 m_x4b000000;
static GSVector8 m_x4f800000;
static GSVector8 m_max;
static GSVector8 m_min;
static void InitVectors();
__forceinline GSVector8()
{

View File

@ -24,7 +24,12 @@
#include "GSUtil.h"
#include "GSState.h"
const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
GSVector4 GSVertexTrace::s_minmax;
void GSVertexTrace::InitVectors()
{
s_minmax = GSVector4(FLT_MAX, -FLT_MAX);
}
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state)

View File

@ -38,7 +38,7 @@ public:
protected:
const GSState* m_state;
static const GSVector4 s_minmax;
static GSVector4 s_minmax;
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
@ -69,6 +69,8 @@ public:
GSVector2 m_lod; // x = min, y = max
public:
static void InitVectors();
GSVertexTrace(const GSState* state);
virtual ~GSVertexTrace() {}

View File

@ -127,6 +127,21 @@ GSdxApp theApp;
GSdxApp::GSdxApp()
{
// Empty constructor causes an illegal instruction exception on an SSE4.2 machine on Windows.
// Non-empty doesn't, but raises a SIGILL signal when compiled against GCC 6.1.1.
// So here's a compromise.
#ifdef _WIN32
Init();
#endif
}
void GSdxApp::Init()
{
static bool is_initialised = false;
if (is_initialised)
return;
is_initialised = true;
m_ini = "inis/GSdx.ini";
m_section = "Settings";

View File

@ -35,6 +35,7 @@ class GSdxApp
public:
GSdxApp();
void Init();
void* GetModuleHandlePtr();
#ifdef _WIN32

View File

@ -57,8 +57,33 @@ EXPORT_C_(uint32) PSEgetLibVersion()
return version << 16 | revision << 8 | PLUGIN_VERSION;
}
static void InitVectors()
{
GSVector4i::InitVectors();
GSVector4::InitVectors();
#if _M_SSE >= 0x500
GSVector8::InitVectors();
#endif
#if _M_SSE >= 0x501
GSVector8i::InitVectors();
#endif
GPUDrawScanlineCodeGenerator::InitVectors();
GPULocalMemory::InitVectors();
GPUSetupPrimCodeGenerator::InitVectors();
}
EXPORT_C_(int32) GPUinit()
{
if(!GSUtil::CheckSSE())
{
return -1;
}
theApp.Init();
InitVectors();
return 0;
}
@ -140,6 +165,13 @@ EXPORT_C_(int32) GPUopen(void* hWnd)
EXPORT_C_(int32) GPUconfigure()
{
if(!GSUtil::CheckSSE())
{
return -1;
}
theApp.Init();
#ifdef _WIN32
GPUSettingsDlg dlg;
@ -160,6 +192,11 @@ EXPORT_C_(int32) GPUconfigure()
EXPORT_C_(int32) GPUtest()
{
if(!GSUtil::CheckSSE())
{
return -1;
}
return 0;
}

View File

@ -1010,7 +1010,19 @@ void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm&
movdqa(a, b);
}
const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
GSVector4i GPUDrawScanlineCodeGenerator::m_test[8];
alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
{
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
};
void GPUDrawScanlineCodeGenerator::InitVectors()
{
GSVector4i test[8] =
{
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
@ -1022,10 +1034,6 @@ const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
GSVector4i::zero(),
};
alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
{
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
};
for (size_t n = 0; n < countof(test); ++n)
m_test[n] = test[n];
}

View File

@ -55,6 +55,8 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
public:
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
static const GSVector4i m_test[8];
static GSVector4i m_test[8];
alignas(32) static const uint16 m_dither[4][16];
static void InitVectors();
};

View File

@ -23,15 +23,23 @@
#include "GPULocalMemory.h"
#include "GSdx.h"
const GSVector4i GPULocalMemory::m_xxxa(0x00008000);
const GSVector4i GPULocalMemory::m_xxbx(0x00007c00);
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0);
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f);
GSVector4i GPULocalMemory::m_xxxa;
GSVector4i GPULocalMemory::m_xxbx;
GSVector4i GPULocalMemory::m_xgxx;
GSVector4i GPULocalMemory::m_rxxx;
#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16))
#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2)
#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32)
void GPULocalMemory::InitVectors()
{
m_xxxa = GSVector4i(0x00008000);
m_xxbx = GSVector4i(0x00007c00);
m_xgxx = GSVector4i(0x000003e0);
m_rxxx = GSVector4i(0x0000001f);
}
GPULocalMemory::GPULocalMemory()
{
m_scale.x = std::min<int>(std::max<int>(theApp.GetConfigI("scale_x"), 0), 2);

View File

@ -26,10 +26,10 @@
class GPULocalMemory
{
static const GSVector4i m_xxxa;
static const GSVector4i m_xxbx;
static const GSVector4i m_xgxx;
static const GSVector4i m_rxxx;
static GSVector4i m_xxxa;
static GSVector4i m_xxbx;
static GSVector4i m_xgxx;
static GSVector4i m_rxxx;
uint16* m_vm;
@ -50,6 +50,8 @@ class GPULocalMemory
GSVector2i m_scale;
public:
static void InitVectors();
GPULocalMemory();
virtual ~GPULocalMemory();

View File

@ -220,9 +220,17 @@ void GPUSetupPrimCodeGenerator::Generate()
ret();
}
const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] =
GSVector4 GPUSetupPrimCodeGenerator::m_shift[3];
void GPUSetupPrimCodeGenerator::InitVectors()
{
GSVector4 shift[3] =
{
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
};
for (size_t n = 0; n < countof(shift); ++n)
m_shift[n] = shift[n];
}

View File

@ -36,5 +36,7 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
public:
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
static const GSVector4 m_shift[3];
static GSVector4 m_shift[3];
static void InitVectors();
};