mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #1477 from turtleli/gsdx-defer-init
gsdx: Avoid illegal instruction crash on older CPUs
This commit is contained in:
commit
f978f9a07d
|
@ -123,6 +123,30 @@ EXPORT_C_(int) GSinit()
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Vector instructions must be avoided when initialising GSdx since PCSX2
|
||||||
|
// can crash if the CPU does not support the instruction set.
|
||||||
|
// Initialise it here instead - it's not ideal since we have to strip the
|
||||||
|
// const type qualifier from all the affected variables.
|
||||||
|
theApp.Init();
|
||||||
|
|
||||||
|
GSBlock::InitVectors();
|
||||||
|
GSClut::InitVectors();
|
||||||
|
GSDrawScanlineCodeGenerator::InitVectors();
|
||||||
|
#ifdef ENABLE_OPENCL
|
||||||
|
GSRendererCL::InitVectors();
|
||||||
|
#endif
|
||||||
|
GSRendererSW::InitVectors();
|
||||||
|
GSSetupPrimCodeGenerator::InitVectors();
|
||||||
|
GSVector4i::InitVectors();
|
||||||
|
GSVector4::InitVectors();
|
||||||
|
#if _M_SSE >= 0x500
|
||||||
|
GSVector8::InitVectors();
|
||||||
|
#endif
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8i::InitVectors();
|
||||||
|
#endif
|
||||||
|
GSVertexTrace::InitVectors();
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
||||||
|
@ -793,6 +817,8 @@ EXPORT_C GSconfigure()
|
||||||
{
|
{
|
||||||
if(!GSUtil::CheckSSE()) return;
|
if(!GSUtil::CheckSSE()) return;
|
||||||
|
|
||||||
|
theApp.Init();
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
GSDialog::InitCommonControls();
|
GSDialog::InitCommonControls();
|
||||||
if(GSSettingsDlg().DoModal() == IDOK)
|
if(GSSettingsDlg().DoModal() == IDOK)
|
||||||
|
|
|
@ -23,26 +23,54 @@
|
||||||
#include "GSBlock.h"
|
#include "GSBlock.h"
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
GSVector8i GSBlock::m_r16mask;
|
||||||
#else
|
#else
|
||||||
const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
GSVector4i GSBlock::m_r16mask;
|
||||||
#endif
|
#endif
|
||||||
const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
|
GSVector4i GSBlock::m_r8mask;
|
||||||
const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
GSVector4i GSBlock::m_r4mask;
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
const GSVector8i GSBlock::m_xxxa(0x00008000);
|
GSVector8i GSBlock::m_xxxa;
|
||||||
const GSVector8i GSBlock::m_xxbx(0x00007c00);
|
GSVector8i GSBlock::m_xxbx;
|
||||||
const GSVector8i GSBlock::m_xgxx(0x000003e0);
|
GSVector8i GSBlock::m_xgxx;
|
||||||
const GSVector8i GSBlock::m_rxxx(0x0000001f);
|
GSVector8i GSBlock::m_rxxx;
|
||||||
#else
|
#else
|
||||||
const GSVector4i GSBlock::m_xxxa(0x00008000);
|
GSVector4i GSBlock::m_xxxa;
|
||||||
const GSVector4i GSBlock::m_xxbx(0x00007c00);
|
GSVector4i GSBlock::m_xxbx;
|
||||||
const GSVector4i GSBlock::m_xgxx(0x000003e0);
|
GSVector4i GSBlock::m_xgxx;
|
||||||
const GSVector4i GSBlock::m_rxxx(0x0000001f);
|
GSVector4i GSBlock::m_rxxx;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
|
GSVector4i GSBlock::m_uw8hmask0;
|
||||||
const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
|
GSVector4i GSBlock::m_uw8hmask1;
|
||||||
const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
|
GSVector4i GSBlock::m_uw8hmask2;
|
||||||
const GSVector4i GSBlock::m_uw8hmask3(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
|
GSVector4i GSBlock::m_uw8hmask3;
|
||||||
|
|
||||||
|
void GSBlock::InitVectors()
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
m_r16mask = GSVector8i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||||
|
#else
|
||||||
|
m_r16mask = GSVector4i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||||
|
#endif
|
||||||
|
m_r8mask = GSVector4i(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
|
||||||
|
m_r4mask = GSVector4i(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||||
|
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
m_xxxa = GSVector8i(0x00008000);
|
||||||
|
m_xxbx = GSVector8i(0x00007c00);
|
||||||
|
m_xgxx = GSVector8i(0x000003e0);
|
||||||
|
m_rxxx = GSVector8i(0x0000001f);
|
||||||
|
#else
|
||||||
|
m_xxxa = GSVector4i(0x00008000);
|
||||||
|
m_xxbx = GSVector4i(0x00007c00);
|
||||||
|
m_xgxx = GSVector4i(0x000003e0);
|
||||||
|
m_rxxx = GSVector4i(0x0000001f);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
|
||||||
|
m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
|
||||||
|
m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
|
||||||
|
m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
|
||||||
|
}
|
||||||
|
|
|
@ -28,31 +28,33 @@
|
||||||
class GSBlock
|
class GSBlock
|
||||||
{
|
{
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
static const GSVector8i m_r16mask;
|
static GSVector8i m_r16mask;
|
||||||
#else
|
#else
|
||||||
static const GSVector4i m_r16mask;
|
static GSVector4i m_r16mask;
|
||||||
#endif
|
#endif
|
||||||
static const GSVector4i m_r8mask;
|
static GSVector4i m_r8mask;
|
||||||
static const GSVector4i m_r4mask;
|
static GSVector4i m_r4mask;
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
static const GSVector8i m_xxxa;
|
static GSVector8i m_xxxa;
|
||||||
static const GSVector8i m_xxbx;
|
static GSVector8i m_xxbx;
|
||||||
static const GSVector8i m_xgxx;
|
static GSVector8i m_xgxx;
|
||||||
static const GSVector8i m_rxxx;
|
static GSVector8i m_rxxx;
|
||||||
#else
|
#else
|
||||||
static const GSVector4i m_xxxa;
|
static GSVector4i m_xxxa;
|
||||||
static const GSVector4i m_xxbx;
|
static GSVector4i m_xxbx;
|
||||||
static const GSVector4i m_xgxx;
|
static GSVector4i m_xgxx;
|
||||||
static const GSVector4i m_rxxx;
|
static GSVector4i m_rxxx;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
static const GSVector4i m_uw8hmask0;
|
static GSVector4i m_uw8hmask0;
|
||||||
static const GSVector4i m_uw8hmask1;
|
static GSVector4i m_uw8hmask1;
|
||||||
static const GSVector4i m_uw8hmask2;
|
static GSVector4i m_uw8hmask2;
|
||||||
static const GSVector4i m_uw8hmask3;
|
static GSVector4i m_uw8hmask3;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
||||||
{
|
{
|
||||||
const uint8* RESTRICT s0 = &src[srcpitch * 0];
|
const uint8* RESTRICT s0 = &src[srcpitch * 0];
|
||||||
|
|
|
@ -682,17 +682,24 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
|
||||||
|
|
||||||
// TODO
|
// TODO
|
||||||
|
|
||||||
static const GSVector4i s_bm(0x00007c00);
|
GSVector4i GSClut::m_bm;
|
||||||
static const GSVector4i s_gm(0x000003e0);
|
GSVector4i GSClut::m_gm;
|
||||||
static const GSVector4i s_rm(0x0000001f);
|
GSVector4i GSClut::m_rm;
|
||||||
|
|
||||||
|
void GSClut::InitVectors()
|
||||||
|
{
|
||||||
|
m_bm = GSVector4i(0x00007c00);
|
||||||
|
m_gm = GSVector4i(0x000003e0);
|
||||||
|
m_rm = GSVector4i(0x0000001f);
|
||||||
|
}
|
||||||
|
|
||||||
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
|
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
|
||||||
{
|
{
|
||||||
ASSERT((w & 7) == 0);
|
ASSERT((w & 7) == 0);
|
||||||
|
|
||||||
const GSVector4i rm = s_rm;
|
const GSVector4i rm = m_rm;
|
||||||
const GSVector4i gm = s_gm;
|
const GSVector4i gm = m_gm;
|
||||||
const GSVector4i bm = s_bm;
|
const GSVector4i bm = m_bm;
|
||||||
|
|
||||||
GSVector4i TA0(TEXA.TA0 << 24);
|
GSVector4i TA0(TEXA.TA0 << 24);
|
||||||
GSVector4i TA1(TEXA.TA1 << 24);
|
GSVector4i TA1(TEXA.TA1 << 24);
|
||||||
|
|
|
@ -30,6 +30,10 @@ class GSLocalMemory;
|
||||||
|
|
||||||
class alignas(32) GSClut : public GSAlignedClass<32>
|
class alignas(32) GSClut : public GSAlignedClass<32>
|
||||||
{
|
{
|
||||||
|
static GSVector4i m_bm;
|
||||||
|
static GSVector4i m_gm;
|
||||||
|
static GSVector4i m_rm;
|
||||||
|
|
||||||
GSLocalMemory* m_mem;
|
GSLocalMemory* m_mem;
|
||||||
|
|
||||||
uint32 m_CBP[2];
|
uint32 m_CBP[2];
|
||||||
|
@ -93,6 +97,8 @@ class alignas(32) GSClut : public GSAlignedClass<32>
|
||||||
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
|
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
GSClut(GSLocalMemory* mem);
|
GSClut(GSLocalMemory* mem);
|
||||||
virtual ~GSClut();
|
virtual ~GSClut();
|
||||||
|
|
||||||
|
|
|
@ -516,7 +516,7 @@ CRC::Game CRC::m_games[] =
|
||||||
{0x06A7506A, SacredBlaze, JP, 0},
|
{0x06A7506A, SacredBlaze, JP, 0},
|
||||||
};
|
};
|
||||||
|
|
||||||
hash_map<uint32, CRC::Game*> CRC::m_map;
|
map<uint32, CRC::Game*> CRC::m_map;
|
||||||
|
|
||||||
string ToLower( string str )
|
string ToLower( string str )
|
||||||
{
|
{
|
||||||
|
@ -563,7 +563,7 @@ CRC::Game CRC::Lookup(uint32 crc)
|
||||||
printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups);
|
printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups);
|
||||||
}
|
}
|
||||||
|
|
||||||
hash_map<uint32, Game*>::iterator i = m_map.find(crc);
|
auto i = m_map.find(crc);
|
||||||
|
|
||||||
if(i != m_map.end())
|
if(i != m_map.end())
|
||||||
{
|
{
|
||||||
|
|
|
@ -211,7 +211,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
static Game m_games[];
|
static Game m_games[];
|
||||||
static hash_map<uint32, Game*> m_map;
|
static map<uint32, Game*> m_map;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
static Game Lookup(uint32 crc);
|
static Game Lookup(uint32 crc);
|
||||||
|
|
|
@ -44,7 +44,16 @@ alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] =
|
||||||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||||
};
|
};
|
||||||
|
|
||||||
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4];
|
||||||
|
#else
|
||||||
|
GSVector4i GSDrawScanlineCodeGenerator::m_test[8];
|
||||||
|
GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4];
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void GSDrawScanlineCodeGenerator::InitVectors()
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8 log2_coef[4] =
|
||||||
{
|
{
|
||||||
GSVector8(0.204446009836232697516f),
|
GSVector8(0.204446009836232697516f),
|
||||||
GSVector8(-1.04913055217340124191f),
|
GSVector8(-1.04913055217340124191f),
|
||||||
|
@ -52,9 +61,11 @@ const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||||
GSVector8(1.0f),
|
GSVector8(1.0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
#else
|
for (size_t n = 0; n < countof(log2_coef); ++n)
|
||||||
|
m_log2_coef[n] = log2_coef[n];
|
||||||
|
|
||||||
const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
|
#else
|
||||||
|
GSVector4i test[8] =
|
||||||
{
|
{
|
||||||
GSVector4i::zero(),
|
GSVector4i::zero(),
|
||||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||||
|
@ -66,7 +77,7 @@ const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
|
||||||
GSVector4i::zero(),
|
GSVector4i::zero(),
|
||||||
};
|
};
|
||||||
|
|
||||||
const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
GSVector4 log2_coef[4] =
|
||||||
{
|
{
|
||||||
GSVector4(0.204446009836232697516f),
|
GSVector4(0.204446009836232697516f),
|
||||||
GSVector4(-1.04913055217340124191f),
|
GSVector4(-1.04913055217340124191f),
|
||||||
|
@ -74,7 +85,14 @@ const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||||
GSVector4(1.0f),
|
GSVector4(1.0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
for (size_t n = 0; n < countof(test); ++n)
|
||||||
|
m_test[n] = test[n];
|
||||||
|
|
||||||
|
for (size_t n = 0; n < countof(log2_coef); ++n)
|
||||||
|
m_log2_coef[n] = log2_coef[n];
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: GSCodeGenerator(code, maxsize)
|
||||||
|
|
|
@ -136,10 +136,11 @@ public:
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
alignas(8) static const uint8 m_test[16][8];
|
alignas(8) static const uint8 m_test[16][8];
|
||||||
static const GSVector8 m_log2_coef[4];
|
static GSVector8 m_log2_coef[4];
|
||||||
#else
|
#else
|
||||||
static const GSVector4i m_test[8];
|
static GSVector4i m_test[8];
|
||||||
static const GSVector4 m_log2_coef[4];
|
static GSVector4 m_log2_coef[4];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
};
|
};
|
||||||
|
|
|
@ -74,6 +74,13 @@ typedef struct
|
||||||
|
|
||||||
#pragma pack(pop)
|
#pragma pack(pop)
|
||||||
|
|
||||||
|
static GSVector4 GSRendererCL::m_pos_scale;
|
||||||
|
|
||||||
|
void GSRendererCL::InitVectors()
|
||||||
|
{
|
||||||
|
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
|
||||||
|
}
|
||||||
|
|
||||||
GSRendererCL::GSRendererCL()
|
GSRendererCL::GSRendererCL()
|
||||||
: m_vb_count(0)
|
: m_vb_count(0)
|
||||||
, m_synced(true)
|
, m_synced(true)
|
||||||
|
@ -200,8 +207,6 @@ GSTexture* GSRendererCL::GetOutput(int i, int& y_offset)
|
||||||
return m_texture[i];
|
return m_texture[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
|
|
||||||
|
|
||||||
template<uint32 primclass, uint32 tme, uint32 fst>
|
template<uint32 primclass, uint32 tme, uint32 fst>
|
||||||
void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
||||||
{
|
{
|
||||||
|
@ -214,7 +219,7 @@ void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex*
|
||||||
|
|
||||||
GSVector4i xyzuvf(src->m[1]);
|
GSVector4i xyzuvf(src->m[1]);
|
||||||
|
|
||||||
dst->p = (GSVector4(xyzuvf.upl16() - o) * g_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
|
dst->p = (GSVector4(xyzuvf.upl16() - o) * m_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
|
||||||
|
|
||||||
GSVector4 t = GSVector4::zero();
|
GSVector4 t = GSVector4::zero();
|
||||||
|
|
||||||
|
|
|
@ -32,6 +32,8 @@ struct alignas(32) GSVertexCL
|
||||||
|
|
||||||
class GSRendererCL : public GSRenderer
|
class GSRendererCL : public GSRenderer
|
||||||
{
|
{
|
||||||
|
static GSVector4 m_pos_scale;
|
||||||
|
|
||||||
typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||||
|
|
||||||
ConvertVertexBufferPtr m_cvb[4][2][2];
|
ConvertVertexBufferPtr m_cvb[4][2][2];
|
||||||
|
@ -261,6 +263,8 @@ protected:
|
||||||
bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count);
|
bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
GSRendererCL();
|
GSRendererCL();
|
||||||
virtual ~GSRendererCL();
|
virtual ~GSRendererCL();
|
||||||
};
|
};
|
||||||
|
|
|
@ -26,11 +26,19 @@
|
||||||
|
|
||||||
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
||||||
|
|
||||||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
GSVector4 GSRendererSW::m_pos_scale;
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8 GSRendererSW::m_pos_scale2;
|
||||||
|
#endif
|
||||||
|
|
||||||
|
void GSRendererSW::InitVectors()
|
||||||
|
{
|
||||||
|
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
const GSVector8 g_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
GSRendererSW::GSRendererSW(int threads)
|
GSRendererSW::GSRendererSW(int threads)
|
||||||
: m_fzb(NULL)
|
: m_fzb(NULL)
|
||||||
|
@ -294,7 +302,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
|
||||||
GSVector8i xy = xyzuvf.upl16() - o2;
|
GSVector8i xy = xyzuvf.upl16() - o2;
|
||||||
GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
|
GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
|
||||||
|
|
||||||
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * g_pos_scale2;
|
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * m_pos_scale2;
|
||||||
GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7);
|
GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7);
|
||||||
|
|
||||||
GSVector8 t = GSVector8::zero();
|
GSVector8 t = GSVector8::zero();
|
||||||
|
@ -364,7 +372,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * m_pos_scale;
|
||||||
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
|
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
|
||||||
|
|
||||||
GSVector4 t = GSVector4::zero();
|
GSVector4 t = GSVector4::zero();
|
||||||
|
|
|
@ -27,6 +27,11 @@
|
||||||
|
|
||||||
class GSRendererSW : public GSRenderer
|
class GSRendererSW : public GSRenderer
|
||||||
{
|
{
|
||||||
|
static GSVector4 m_pos_scale;
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
static GSVector8 m_pos_scale2;
|
||||||
|
#endif
|
||||||
|
|
||||||
class SharedData : public GSDrawScanline::SharedData
|
class SharedData : public GSDrawScanline::SharedData
|
||||||
{
|
{
|
||||||
struct alignas(16) TextureLevel
|
struct alignas(16) TextureLevel
|
||||||
|
@ -95,6 +100,8 @@ protected:
|
||||||
bool GetScanlineGlobalData(SharedData* data);
|
bool GetScanlineGlobalData(SharedData* data);
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
GSRendererSW(int threads);
|
GSRendererSW(int threads);
|
||||||
virtual ~GSRendererSW();
|
virtual ~GSRendererSW();
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,8 +23,15 @@
|
||||||
#include "GSSetupPrimCodeGenerator.h"
|
#include "GSSetupPrimCodeGenerator.h"
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8 GSSetupPrimCodeGenerator::m_shift[9];
|
||||||
|
#else
|
||||||
|
GSVector4 GSSetupPrimCodeGenerator::m_shift[5];
|
||||||
|
#endif
|
||||||
|
|
||||||
const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] =
|
void GSSetupPrimCodeGenerator::InitVectors()
|
||||||
|
{
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8 shift[9] =
|
||||||
{
|
{
|
||||||
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
|
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
|
||||||
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
|
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
|
||||||
|
@ -37,9 +44,11 @@ const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] =
|
||||||
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
|
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
#else
|
for (size_t n = 0; n < countof(shift); ++n)
|
||||||
|
m_shift[n] = shift[n];
|
||||||
|
|
||||||
const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
|
#else
|
||||||
|
GSVector4 shift[5] =
|
||||||
{
|
{
|
||||||
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
|
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
|
||||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||||
|
@ -48,7 +57,10 @@ const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
|
||||||
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
for (size_t n = 0; n < countof(shift); ++n)
|
||||||
|
m_shift[n] = shift[n];
|
||||||
#endif
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||||
: GSCodeGenerator(code, maxsize)
|
: GSCodeGenerator(code, maxsize)
|
||||||
|
|
|
@ -43,8 +43,10 @@ public:
|
||||||
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
static const GSVector8 m_shift[9];
|
static GSVector8 m_shift[9];
|
||||||
#else
|
#else
|
||||||
static const GSVector4 m_shift[5];
|
static GSVector4 m_shift[5];
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
};
|
};
|
||||||
|
|
|
@ -35,58 +35,60 @@
|
||||||
|
|
||||||
const char* GSUtil::GetLibName()
|
const char* GSUtil::GetLibName()
|
||||||
{
|
{
|
||||||
// TODO: critsec
|
// The following ifdef mess is courtesy of "static string str;"
|
||||||
|
// being optimised by GCC to be unusable by older CPUs. Enjoy!
|
||||||
|
static char name[255];
|
||||||
|
|
||||||
static string str;
|
snprintf(name, sizeof(name), "GSdx "
|
||||||
|
|
||||||
if(str.empty())
|
|
||||||
{
|
|
||||||
str = "GSdx";
|
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
str += format(" %lld", SVN_REV);
|
"%lld "
|
||||||
if(SVN_MODS) str += "m";
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef _M_AMD64
|
#ifdef _M_AMD64
|
||||||
str += " 64-bit";
|
"64-bit "
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
list<string> sl;
|
|
||||||
|
|
||||||
#ifdef __INTEL_COMPILER
|
#ifdef __INTEL_COMPILER
|
||||||
sl.push_back(format("Intel C++ %d.%02d", __INTEL_COMPILER / 100, __INTEL_COMPILER % 100));
|
"(Intel C++ %d.%02d %s)",
|
||||||
#elif _MSC_VER
|
#elif _MSC_VER
|
||||||
sl.push_back(format("MSVC %d.%02d", _MSC_VER / 100, _MSC_VER % 100));
|
"(MSVC %d.%02d %s)",
|
||||||
|
#elif __clang__
|
||||||
|
"(clang %d.%d.%d %s)",
|
||||||
#elif __GNUC__
|
#elif __GNUC__
|
||||||
sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__));
|
"(GCC %d.%d.%d %s)",
|
||||||
|
#else
|
||||||
|
"(%s)",
|
||||||
|
#endif
|
||||||
|
#ifdef _WIN32
|
||||||
|
SVN_REV,
|
||||||
|
#endif
|
||||||
|
#ifdef __INTEL_COMPILER
|
||||||
|
__INTEL_COMPILER / 100, __INTEL_COMPILER % 100,
|
||||||
|
#elif _MSC_VER
|
||||||
|
_MSC_VER / 100, _MSC_VER % 100,
|
||||||
|
#elif __clang__
|
||||||
|
__clang_major__, __clang_minor__, __clang_patchlevel__,
|
||||||
|
#elif __GNUC__
|
||||||
|
__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
sl.push_back("AVX2");
|
"AVX2"
|
||||||
#elif _M_SSE >= 0x500
|
#elif _M_SSE >= 0x500
|
||||||
sl.push_back("AVX");
|
"AVX"
|
||||||
#elif _M_SSE >= 0x402
|
#elif _M_SSE >= 0x402
|
||||||
sl.push_back("SSE42");
|
"SSE4.2"
|
||||||
#elif _M_SSE >= 0x401
|
#elif _M_SSE >= 0x401
|
||||||
sl.push_back("SSE41");
|
"SSE4.1"
|
||||||
#elif _M_SSE >= 0x301
|
#elif _M_SSE >= 0x301
|
||||||
sl.push_back("SSSE3");
|
"SSSE3"
|
||||||
#elif _M_SSE >= 0x200
|
#elif _M_SSE >= 0x200
|
||||||
sl.push_back("SSE2");
|
"SSE2"
|
||||||
#elif _M_SSE >= 0x100
|
#elif _M_SSE >= 0x100
|
||||||
sl.push_back("SSE");
|
"SSE"
|
||||||
#endif
|
#endif
|
||||||
|
);
|
||||||
|
|
||||||
for(list<string>::iterator i = sl.begin(); i != sl.end(); )
|
return name;
|
||||||
{
|
|
||||||
if(i == sl.begin()) str += " (";
|
|
||||||
str += *i;
|
|
||||||
str += ++i != sl.end() ? ", " : ")";
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return str.c_str();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
static class GSUtilMaps
|
static class GSUtilMaps
|
||||||
|
@ -203,22 +205,31 @@ bool GSUtil::CheckSSE()
|
||||||
{
|
{
|
||||||
Xbyak::util::Cpu cpu;
|
Xbyak::util::Cpu cpu;
|
||||||
Xbyak::util::Cpu::Type type;
|
Xbyak::util::Cpu::Type type;
|
||||||
|
const char* instruction_set = "";
|
||||||
|
|
||||||
#if _M_SSE >= 0x500
|
#if _M_SSE >= 0x501
|
||||||
|
type = Xbyak::util::Cpu::tAVX2;
|
||||||
|
instruction_set = "AVX2";
|
||||||
|
#elif _M_SSE >= 0x500
|
||||||
type = Xbyak::util::Cpu::tAVX;
|
type = Xbyak::util::Cpu::tAVX;
|
||||||
|
instruction_set = "AVX";
|
||||||
#elif _M_SSE >= 0x402
|
#elif _M_SSE >= 0x402
|
||||||
type = Xbyak::util::Cpu::tSSE42;
|
type = Xbyak::util::Cpu::tSSE42;
|
||||||
|
instruction_set = "SSE4.2";
|
||||||
#elif _M_SSE >= 0x401
|
#elif _M_SSE >= 0x401
|
||||||
type = Xbyak::util::Cpu::tSSE41;
|
type = Xbyak::util::Cpu::tSSE41;
|
||||||
|
instruction_set = "SSE4.1";
|
||||||
#elif _M_SSE >= 0x301
|
#elif _M_SSE >= 0x301
|
||||||
type = Xbyak::util::Cpu::tSSSE3;
|
type = Xbyak::util::Cpu::tSSSE3;
|
||||||
|
instruction_set = "SSSE3";
|
||||||
#elif _M_SSE >= 0x200
|
#elif _M_SSE >= 0x200
|
||||||
type = Xbyak::util::Cpu::tSSE2;
|
type = Xbyak::util::Cpu::tSSE2;
|
||||||
|
instruction_set = "SSE2";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if(!cpu.has(type))
|
if(!cpu.has(type))
|
||||||
{
|
{
|
||||||
fprintf(stderr, "This CPU does not support SSE %d.%02d", _M_SSE >> 8, _M_SSE & 0xff);
|
fprintf(stderr, "This CPU does not support %s\n", instruction_set);
|
||||||
|
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,12 @@
|
||||||
#include "stdafx.h"
|
#include "stdafx.h"
|
||||||
#include "GSVector.h"
|
#include "GSVector.h"
|
||||||
|
|
||||||
const GSVector4i GSVector4i::m_xff[17] =
|
GSVector4i GSVector4i::m_xff[17];
|
||||||
|
GSVector4i GSVector4i::m_x0f[17];
|
||||||
|
|
||||||
|
void GSVector4i::InitVectors()
|
||||||
|
{
|
||||||
|
GSVector4i xff[17] =
|
||||||
{
|
{
|
||||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||||
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||||
|
@ -43,7 +48,7 @@ const GSVector4i GSVector4i::m_xff[17] =
|
||||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||||
};
|
};
|
||||||
|
|
||||||
const GSVector4i GSVector4i::m_x0f[17] =
|
GSVector4i x0f[17] =
|
||||||
{
|
{
|
||||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||||
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||||
|
@ -64,33 +69,70 @@ const GSVector4i GSVector4i::m_x0f[17] =
|
||||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
|
for (size_t n = 0; n < countof(xff); ++n)
|
||||||
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
m_xff[n] = xff[n];
|
||||||
const GSVector4 GSVector4::m_half(0.5f);
|
|
||||||
const GSVector4 GSVector4::m_one(1.0f);
|
for (size_t n = 0; n < countof(x0f); ++n)
|
||||||
const GSVector4 GSVector4::m_two(2.0f);
|
m_x0f[n] = x0f[n];
|
||||||
const GSVector4 GSVector4::m_four(4.0f);
|
}
|
||||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
|
||||||
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
|
GSVector4 GSVector4::m_ps0123;
|
||||||
const GSVector4 GSVector4::m_max(FLT_MAX);
|
GSVector4 GSVector4::m_ps4567;
|
||||||
const GSVector4 GSVector4::m_min(FLT_MIN);
|
GSVector4 GSVector4::m_half;
|
||||||
|
GSVector4 GSVector4::m_one;
|
||||||
|
GSVector4 GSVector4::m_two;
|
||||||
|
GSVector4 GSVector4::m_four;
|
||||||
|
GSVector4 GSVector4::m_x4b000000;
|
||||||
|
GSVector4 GSVector4::m_x4f800000;
|
||||||
|
GSVector4 GSVector4::m_max;
|
||||||
|
GSVector4 GSVector4::m_min;
|
||||||
|
|
||||||
|
void GSVector4::InitVectors()
|
||||||
|
{
|
||||||
|
m_ps0123 = GSVector4(0.0f, 1.0f, 2.0f, 3.0f);
|
||||||
|
m_ps4567 = GSVector4(4.0f, 5.0f, 6.0f, 7.0f);
|
||||||
|
m_half = GSVector4(0.5f);
|
||||||
|
m_one = GSVector4(1.0f);
|
||||||
|
m_two = GSVector4(2.0f);
|
||||||
|
m_four = GSVector4(4.0f);
|
||||||
|
m_x4b000000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||||
|
m_x4f800000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
|
||||||
|
m_max = GSVector4(FLT_MAX);
|
||||||
|
m_min = GSVector4(FLT_MIN);
|
||||||
|
}
|
||||||
|
|
||||||
#if _M_SSE >= 0x500
|
#if _M_SSE >= 0x500
|
||||||
|
|
||||||
const GSVector8 GSVector8::m_half(0.5f);
|
GSVector8 GSVector8::m_half;
|
||||||
const GSVector8 GSVector8::m_one(1.0f);
|
GSVector8 GSVector8::m_one;
|
||||||
const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
|
GSVector8 GSVector8::m_x7fffffff;
|
||||||
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
|
GSVector8 GSVector8::m_x80000000;
|
||||||
const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
|
GSVector8 GSVector8::m_x4b000000;
|
||||||
const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
|
GSVector8 GSVector8::m_x4f800000;
|
||||||
const GSVector8 GSVector8::m_max(FLT_MAX);
|
GSVector8 GSVector8::m_max;
|
||||||
const GSVector8 GSVector8::m_min(FLT_MIN);
|
GSVector8 GSVector8::m_min;
|
||||||
|
|
||||||
|
void GSVector8::InitVectors()
|
||||||
|
{
|
||||||
|
m_half = GSVector8(0.5f);
|
||||||
|
m_one = GSVector8(1.0f);
|
||||||
|
m_x7fffffff = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
|
||||||
|
m_x80000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
|
||||||
|
m_x4b000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
|
||||||
|
m_x4f800000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
|
||||||
|
m_max = GSVector8(FLT_MAX);
|
||||||
|
m_min = GSVector8(FLT_MIN);
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if _M_SSE >= 0x501
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8i GSVector8i::m_xff[33];
|
||||||
|
GSVector8i GSVector8i::m_x0f[33];
|
||||||
|
|
||||||
const GSVector8i GSVector8i::m_xff[33] =
|
void GSVector8i::InitVectors()
|
||||||
|
{
|
||||||
|
GSVector8i xff[33] =
|
||||||
{
|
{
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||||
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||||
|
@ -127,7 +169,7 @@ const GSVector8i GSVector8i::m_xff[33] =
|
||||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||||
};
|
};
|
||||||
|
|
||||||
const GSVector8i GSVector8i::m_x0f[33] =
|
GSVector8i x0f[33] =
|
||||||
{
|
{
|
||||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||||
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||||
|
@ -164,6 +206,12 @@ const GSVector8i GSVector8i::m_x0f[33] =
|
||||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
for (size_t n = 0; n < countof(xff); ++n)
|
||||||
|
m_xff[n] = xff[n];
|
||||||
|
|
||||||
|
for (size_t n = 0; n < countof(x0f); ++n)
|
||||||
|
m_x0f[n] = x0f[n];
|
||||||
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||||
|
|
|
@ -92,8 +92,8 @@ class GSVector8i;
|
||||||
|
|
||||||
class alignas(16) GSVector4i
|
class alignas(16) GSVector4i
|
||||||
{
|
{
|
||||||
static const GSVector4i m_xff[17];
|
static GSVector4i m_xff[17];
|
||||||
static const GSVector4i m_x0f[17];
|
static GSVector4i m_x0f[17];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union
|
union
|
||||||
|
@ -114,6 +114,8 @@ public:
|
||||||
__m128i m;
|
__m128i m;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
__forceinline GSVector4i()
|
__forceinline GSVector4i()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
@ -2442,16 +2444,18 @@ public:
|
||||||
__m128 m;
|
__m128 m;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const GSVector4 m_ps0123;
|
static GSVector4 m_ps0123;
|
||||||
static const GSVector4 m_ps4567;
|
static GSVector4 m_ps4567;
|
||||||
static const GSVector4 m_half;
|
static GSVector4 m_half;
|
||||||
static const GSVector4 m_one;
|
static GSVector4 m_one;
|
||||||
static const GSVector4 m_two;
|
static GSVector4 m_two;
|
||||||
static const GSVector4 m_four;
|
static GSVector4 m_four;
|
||||||
static const GSVector4 m_x4b000000;
|
static GSVector4 m_x4b000000;
|
||||||
static const GSVector4 m_x4f800000;
|
static GSVector4 m_x4f800000;
|
||||||
static const GSVector4 m_max;
|
static GSVector4 m_max;
|
||||||
static const GSVector4 m_min;
|
static GSVector4 m_min;
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
__forceinline GSVector4()
|
__forceinline GSVector4()
|
||||||
{
|
{
|
||||||
|
@ -3343,8 +3347,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
|
||||||
|
|
||||||
class alignas(32) GSVector8i
|
class alignas(32) GSVector8i
|
||||||
{
|
{
|
||||||
static const GSVector8i m_xff[33];
|
static GSVector8i m_xff[33];
|
||||||
static const GSVector8i m_x0f[33];
|
static GSVector8i m_x0f[33];
|
||||||
|
|
||||||
public:
|
public:
|
||||||
union
|
union
|
||||||
|
@ -3365,6 +3369,8 @@ public:
|
||||||
__m128i m0, m1;
|
__m128i m0, m1;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
__forceinline GSVector8i() {}
|
__forceinline GSVector8i() {}
|
||||||
|
|
||||||
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
|
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
|
||||||
|
@ -5154,14 +5160,16 @@ public:
|
||||||
__m128 m0, m1;
|
__m128 m0, m1;
|
||||||
};
|
};
|
||||||
|
|
||||||
static const GSVector8 m_half;
|
static GSVector8 m_half;
|
||||||
static const GSVector8 m_one;
|
static GSVector8 m_one;
|
||||||
static const GSVector8 m_x7fffffff;
|
static GSVector8 m_x7fffffff;
|
||||||
static const GSVector8 m_x80000000;
|
static GSVector8 m_x80000000;
|
||||||
static const GSVector8 m_x4b000000;
|
static GSVector8 m_x4b000000;
|
||||||
static const GSVector8 m_x4f800000;
|
static GSVector8 m_x4f800000;
|
||||||
static const GSVector8 m_max;
|
static GSVector8 m_max;
|
||||||
static const GSVector8 m_min;
|
static GSVector8 m_min;
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
__forceinline GSVector8()
|
__forceinline GSVector8()
|
||||||
{
|
{
|
||||||
|
|
|
@ -24,7 +24,12 @@
|
||||||
#include "GSUtil.h"
|
#include "GSUtil.h"
|
||||||
#include "GSState.h"
|
#include "GSState.h"
|
||||||
|
|
||||||
const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
|
GSVector4 GSVertexTrace::s_minmax;
|
||||||
|
|
||||||
|
void GSVertexTrace::InitVectors()
|
||||||
|
{
|
||||||
|
s_minmax = GSVector4(FLT_MAX, -FLT_MAX);
|
||||||
|
}
|
||||||
|
|
||||||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||||
: m_state(state)
|
: m_state(state)
|
||||||
|
|
|
@ -38,7 +38,7 @@ public:
|
||||||
protected:
|
protected:
|
||||||
const GSState* m_state;
|
const GSState* m_state;
|
||||||
|
|
||||||
static const GSVector4 s_minmax;
|
static GSVector4 s_minmax;
|
||||||
|
|
||||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
||||||
|
|
||||||
|
@ -69,6 +69,8 @@ public:
|
||||||
GSVector2 m_lod; // x = min, y = max
|
GSVector2 m_lod; // x = min, y = max
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
GSVertexTrace(const GSState* state);
|
GSVertexTrace(const GSState* state);
|
||||||
virtual ~GSVertexTrace() {}
|
virtual ~GSVertexTrace() {}
|
||||||
|
|
||||||
|
|
|
@ -127,6 +127,21 @@ GSdxApp theApp;
|
||||||
|
|
||||||
GSdxApp::GSdxApp()
|
GSdxApp::GSdxApp()
|
||||||
{
|
{
|
||||||
|
// Empty constructor causes an illegal instruction exception on an SSE4.2 machine on Windows.
|
||||||
|
// Non-empty doesn't, but raises a SIGILL signal when compiled against GCC 6.1.1.
|
||||||
|
// So here's a compromise.
|
||||||
|
#ifdef _WIN32
|
||||||
|
Init();
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
void GSdxApp::Init()
|
||||||
|
{
|
||||||
|
static bool is_initialised = false;
|
||||||
|
if (is_initialised)
|
||||||
|
return;
|
||||||
|
is_initialised = true;
|
||||||
|
|
||||||
m_ini = "inis/GSdx.ini";
|
m_ini = "inis/GSdx.ini";
|
||||||
m_section = "Settings";
|
m_section = "Settings";
|
||||||
|
|
||||||
|
|
|
@ -35,6 +35,7 @@ class GSdxApp
|
||||||
public:
|
public:
|
||||||
GSdxApp();
|
GSdxApp();
|
||||||
|
|
||||||
|
void Init();
|
||||||
void* GetModuleHandlePtr();
|
void* GetModuleHandlePtr();
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
|
@ -57,8 +57,33 @@ EXPORT_C_(uint32) PSEgetLibVersion()
|
||||||
return version << 16 | revision << 8 | PLUGIN_VERSION;
|
return version << 16 | revision << 8 | PLUGIN_VERSION;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void InitVectors()
|
||||||
|
{
|
||||||
|
GSVector4i::InitVectors();
|
||||||
|
GSVector4::InitVectors();
|
||||||
|
#if _M_SSE >= 0x500
|
||||||
|
GSVector8::InitVectors();
|
||||||
|
#endif
|
||||||
|
#if _M_SSE >= 0x501
|
||||||
|
GSVector8i::InitVectors();
|
||||||
|
#endif
|
||||||
|
|
||||||
|
GPUDrawScanlineCodeGenerator::InitVectors();
|
||||||
|
GPULocalMemory::InitVectors();
|
||||||
|
GPUSetupPrimCodeGenerator::InitVectors();
|
||||||
|
}
|
||||||
|
|
||||||
EXPORT_C_(int32) GPUinit()
|
EXPORT_C_(int32) GPUinit()
|
||||||
{
|
{
|
||||||
|
if(!GSUtil::CheckSSE())
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
theApp.Init();
|
||||||
|
|
||||||
|
InitVectors();
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -140,6 +165,13 @@ EXPORT_C_(int32) GPUopen(void* hWnd)
|
||||||
|
|
||||||
EXPORT_C_(int32) GPUconfigure()
|
EXPORT_C_(int32) GPUconfigure()
|
||||||
{
|
{
|
||||||
|
if(!GSUtil::CheckSSE())
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
theApp.Init();
|
||||||
|
|
||||||
#ifdef _WIN32
|
#ifdef _WIN32
|
||||||
|
|
||||||
GPUSettingsDlg dlg;
|
GPUSettingsDlg dlg;
|
||||||
|
@ -160,6 +192,11 @@ EXPORT_C_(int32) GPUconfigure()
|
||||||
|
|
||||||
EXPORT_C_(int32) GPUtest()
|
EXPORT_C_(int32) GPUtest()
|
||||||
{
|
{
|
||||||
|
if(!GSUtil::CheckSSE())
|
||||||
|
{
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -1010,7 +1010,19 @@ void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm&
|
||||||
movdqa(a, b);
|
movdqa(a, b);
|
||||||
}
|
}
|
||||||
|
|
||||||
const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
|
|
||||||
|
GSVector4i GPUDrawScanlineCodeGenerator::m_test[8];
|
||||||
|
alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
|
||||||
|
{
|
||||||
|
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
|
||||||
|
{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
|
||||||
|
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
|
||||||
|
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
|
||||||
|
};
|
||||||
|
|
||||||
|
void GPUDrawScanlineCodeGenerator::InitVectors()
|
||||||
|
{
|
||||||
|
GSVector4i test[8] =
|
||||||
{
|
{
|
||||||
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
|
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||||
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||||
|
@ -1022,10 +1034,6 @@ const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
|
||||||
GSVector4i::zero(),
|
GSVector4i::zero(),
|
||||||
};
|
};
|
||||||
|
|
||||||
alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
|
for (size_t n = 0; n < countof(test); ++n)
|
||||||
{
|
m_test[n] = test[n];
|
||||||
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
|
}
|
||||||
{2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4, 2, 5, 3, 4},
|
|
||||||
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
|
|
||||||
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
|
|
||||||
};
|
|
||||||
|
|
|
@ -55,6 +55,8 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
|
||||||
public:
|
public:
|
||||||
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||||
|
|
||||||
static const GSVector4i m_test[8];
|
static GSVector4i m_test[8];
|
||||||
alignas(32) static const uint16 m_dither[4][16];
|
alignas(32) static const uint16 m_dither[4][16];
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
};
|
};
|
||||||
|
|
|
@ -23,15 +23,23 @@
|
||||||
#include "GPULocalMemory.h"
|
#include "GPULocalMemory.h"
|
||||||
#include "GSdx.h"
|
#include "GSdx.h"
|
||||||
|
|
||||||
const GSVector4i GPULocalMemory::m_xxxa(0x00008000);
|
GSVector4i GPULocalMemory::m_xxxa;
|
||||||
const GSVector4i GPULocalMemory::m_xxbx(0x00007c00);
|
GSVector4i GPULocalMemory::m_xxbx;
|
||||||
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0);
|
GSVector4i GPULocalMemory::m_xgxx;
|
||||||
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f);
|
GSVector4i GPULocalMemory::m_rxxx;
|
||||||
|
|
||||||
#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16))
|
#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16))
|
||||||
#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2)
|
#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2)
|
||||||
#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32)
|
#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32)
|
||||||
|
|
||||||
|
void GPULocalMemory::InitVectors()
|
||||||
|
{
|
||||||
|
m_xxxa = GSVector4i(0x00008000);
|
||||||
|
m_xxbx = GSVector4i(0x00007c00);
|
||||||
|
m_xgxx = GSVector4i(0x000003e0);
|
||||||
|
m_rxxx = GSVector4i(0x0000001f);
|
||||||
|
}
|
||||||
|
|
||||||
GPULocalMemory::GPULocalMemory()
|
GPULocalMemory::GPULocalMemory()
|
||||||
{
|
{
|
||||||
m_scale.x = std::min<int>(std::max<int>(theApp.GetConfigI("scale_x"), 0), 2);
|
m_scale.x = std::min<int>(std::max<int>(theApp.GetConfigI("scale_x"), 0), 2);
|
||||||
|
|
|
@ -26,10 +26,10 @@
|
||||||
|
|
||||||
class GPULocalMemory
|
class GPULocalMemory
|
||||||
{
|
{
|
||||||
static const GSVector4i m_xxxa;
|
static GSVector4i m_xxxa;
|
||||||
static const GSVector4i m_xxbx;
|
static GSVector4i m_xxbx;
|
||||||
static const GSVector4i m_xgxx;
|
static GSVector4i m_xgxx;
|
||||||
static const GSVector4i m_rxxx;
|
static GSVector4i m_rxxx;
|
||||||
|
|
||||||
uint16* m_vm;
|
uint16* m_vm;
|
||||||
|
|
||||||
|
@ -50,6 +50,8 @@ class GPULocalMemory
|
||||||
GSVector2i m_scale;
|
GSVector2i m_scale;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
|
static void InitVectors();
|
||||||
|
|
||||||
GPULocalMemory();
|
GPULocalMemory();
|
||||||
virtual ~GPULocalMemory();
|
virtual ~GPULocalMemory();
|
||||||
|
|
||||||
|
|
|
@ -220,9 +220,17 @@ void GPUSetupPrimCodeGenerator::Generate()
|
||||||
ret();
|
ret();
|
||||||
}
|
}
|
||||||
|
|
||||||
const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] =
|
GSVector4 GPUSetupPrimCodeGenerator::m_shift[3];
|
||||||
|
|
||||||
|
void GPUSetupPrimCodeGenerator::InitVectors()
|
||||||
|
{
|
||||||
|
GSVector4 shift[3] =
|
||||||
{
|
{
|
||||||
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
|
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
|
||||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||||
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
|
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
for (size_t n = 0; n < countof(shift); ++n)
|
||||||
|
m_shift[n] = shift[n];
|
||||||
|
}
|
||||||
|
|
|
@ -36,5 +36,7 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
|
||||||
public:
|
public:
|
||||||
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||||
|
|
||||||
static const GSVector4 m_shift[3];
|
static GSVector4 m_shift[3];
|
||||||
|
|
||||||
|
static void InitVectors();
|
||||||
};
|
};
|
Loading…
Reference in New Issue