mirror of https://github.com/PCSX2/pcsx2.git
Merge pull request #1477 from turtleli/gsdx-defer-init
gsdx: Avoid illegal instruction crash on older CPUs
This commit is contained in:
commit
f978f9a07d
|
@ -123,6 +123,30 @@ EXPORT_C_(int) GSinit()
|
|||
return -1;
|
||||
}
|
||||
|
||||
// Vector instructions must be avoided when initialising GSdx since PCSX2
|
||||
// can crash if the CPU does not support the instruction set.
|
||||
// Initialise it here instead - it's not ideal since we have to strip the
|
||||
// const type qualifier from all the affected variables.
|
||||
theApp.Init();
|
||||
|
||||
GSBlock::InitVectors();
|
||||
GSClut::InitVectors();
|
||||
GSDrawScanlineCodeGenerator::InitVectors();
|
||||
#ifdef ENABLE_OPENCL
|
||||
GSRendererCL::InitVectors();
|
||||
#endif
|
||||
GSRendererSW::InitVectors();
|
||||
GSSetupPrimCodeGenerator::InitVectors();
|
||||
GSVector4i::InitVectors();
|
||||
GSVector4::InitVectors();
|
||||
#if _M_SSE >= 0x500
|
||||
GSVector8::InitVectors();
|
||||
#endif
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8i::InitVectors();
|
||||
#endif
|
||||
GSVertexTrace::InitVectors();
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
s_hr = ::CoInitializeEx(NULL, COINIT_MULTITHREADED);
|
||||
|
@ -793,6 +817,8 @@ EXPORT_C GSconfigure()
|
|||
{
|
||||
if(!GSUtil::CheckSSE()) return;
|
||||
|
||||
theApp.Init();
|
||||
|
||||
#ifdef _WIN32
|
||||
GSDialog::InitCommonControls();
|
||||
if(GSSettingsDlg().DoModal() == IDOK)
|
||||
|
|
|
@ -23,26 +23,54 @@
|
|||
#include "GSBlock.h"
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
const GSVector8i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||
GSVector8i GSBlock::m_r16mask;
|
||||
#else
|
||||
const GSVector4i GSBlock::m_r16mask(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||
GSVector4i GSBlock::m_r16mask;
|
||||
#endif
|
||||
const GSVector4i GSBlock::m_r8mask(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
|
||||
const GSVector4i GSBlock::m_r4mask(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||
GSVector4i GSBlock::m_r8mask;
|
||||
GSVector4i GSBlock::m_r4mask;
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
const GSVector8i GSBlock::m_xxxa(0x00008000);
|
||||
const GSVector8i GSBlock::m_xxbx(0x00007c00);
|
||||
const GSVector8i GSBlock::m_xgxx(0x000003e0);
|
||||
const GSVector8i GSBlock::m_rxxx(0x0000001f);
|
||||
GSVector8i GSBlock::m_xxxa;
|
||||
GSVector8i GSBlock::m_xxbx;
|
||||
GSVector8i GSBlock::m_xgxx;
|
||||
GSVector8i GSBlock::m_rxxx;
|
||||
#else
|
||||
const GSVector4i GSBlock::m_xxxa(0x00008000);
|
||||
const GSVector4i GSBlock::m_xxbx(0x00007c00);
|
||||
const GSVector4i GSBlock::m_xgxx(0x000003e0);
|
||||
const GSVector4i GSBlock::m_rxxx(0x0000001f);
|
||||
GSVector4i GSBlock::m_xxxa;
|
||||
GSVector4i GSBlock::m_xxbx;
|
||||
GSVector4i GSBlock::m_xgxx;
|
||||
GSVector4i GSBlock::m_rxxx;
|
||||
#endif
|
||||
|
||||
const GSVector4i GSBlock::m_uw8hmask0(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
|
||||
const GSVector4i GSBlock::m_uw8hmask1(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
|
||||
const GSVector4i GSBlock::m_uw8hmask2(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
|
||||
const GSVector4i GSBlock::m_uw8hmask3(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
|
||||
GSVector4i GSBlock::m_uw8hmask0;
|
||||
GSVector4i GSBlock::m_uw8hmask1;
|
||||
GSVector4i GSBlock::m_uw8hmask2;
|
||||
GSVector4i GSBlock::m_uw8hmask3;
|
||||
|
||||
void GSBlock::InitVectors()
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
m_r16mask = GSVector8i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15, 0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||
#else
|
||||
m_r16mask = GSVector4i(0, 1, 4, 5, 2, 3, 6, 7, 8, 9, 12, 13, 10, 11, 14, 15);
|
||||
#endif
|
||||
m_r8mask = GSVector4i(0, 4, 2, 6, 8, 12, 10, 14, 1, 5, 3, 7, 9, 13, 11, 15);
|
||||
m_r4mask = GSVector4i(0, 1, 4, 5, 8, 9, 12, 13, 2, 3, 6, 7, 10, 11, 14, 15);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
m_xxxa = GSVector8i(0x00008000);
|
||||
m_xxbx = GSVector8i(0x00007c00);
|
||||
m_xgxx = GSVector8i(0x000003e0);
|
||||
m_rxxx = GSVector8i(0x0000001f);
|
||||
#else
|
||||
m_xxxa = GSVector4i(0x00008000);
|
||||
m_xxbx = GSVector4i(0x00007c00);
|
||||
m_xgxx = GSVector4i(0x000003e0);
|
||||
m_rxxx = GSVector4i(0x0000001f);
|
||||
#endif
|
||||
|
||||
m_uw8hmask0 = GSVector4i(0, 0, 0, 0, 1, 1, 1, 1, 8, 8, 8, 8, 9, 9, 9, 9);
|
||||
m_uw8hmask1 = GSVector4i(2, 2, 2, 2, 3, 3, 3, 3, 10, 10, 10, 10, 11, 11, 11, 11);
|
||||
m_uw8hmask2 = GSVector4i(4, 4, 4, 4, 5, 5, 5, 5, 12, 12, 12, 12, 13, 13, 13, 13);
|
||||
m_uw8hmask3 = GSVector4i(6, 6, 6, 6, 7, 7, 7, 7, 14, 14, 14, 14, 15, 15, 15, 15);
|
||||
}
|
||||
|
|
|
@ -28,31 +28,33 @@
|
|||
class GSBlock
|
||||
{
|
||||
#if _M_SSE >= 0x501
|
||||
static const GSVector8i m_r16mask;
|
||||
static GSVector8i m_r16mask;
|
||||
#else
|
||||
static const GSVector4i m_r16mask;
|
||||
static GSVector4i m_r16mask;
|
||||
#endif
|
||||
static const GSVector4i m_r8mask;
|
||||
static const GSVector4i m_r4mask;
|
||||
static GSVector4i m_r8mask;
|
||||
static GSVector4i m_r4mask;
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
static const GSVector8i m_xxxa;
|
||||
static const GSVector8i m_xxbx;
|
||||
static const GSVector8i m_xgxx;
|
||||
static const GSVector8i m_rxxx;
|
||||
static GSVector8i m_xxxa;
|
||||
static GSVector8i m_xxbx;
|
||||
static GSVector8i m_xgxx;
|
||||
static GSVector8i m_rxxx;
|
||||
#else
|
||||
static const GSVector4i m_xxxa;
|
||||
static const GSVector4i m_xxbx;
|
||||
static const GSVector4i m_xgxx;
|
||||
static const GSVector4i m_rxxx;
|
||||
static GSVector4i m_xxxa;
|
||||
static GSVector4i m_xxbx;
|
||||
static GSVector4i m_xgxx;
|
||||
static GSVector4i m_rxxx;
|
||||
#endif
|
||||
|
||||
static const GSVector4i m_uw8hmask0;
|
||||
static const GSVector4i m_uw8hmask1;
|
||||
static const GSVector4i m_uw8hmask2;
|
||||
static const GSVector4i m_uw8hmask3;
|
||||
static GSVector4i m_uw8hmask0;
|
||||
static GSVector4i m_uw8hmask1;
|
||||
static GSVector4i m_uw8hmask2;
|
||||
static GSVector4i m_uw8hmask3;
|
||||
|
||||
public:
|
||||
static void InitVectors();
|
||||
|
||||
template<int i, int alignment, uint32 mask> __forceinline static void WriteColumn32(uint8* RESTRICT dst, const uint8* RESTRICT src, int srcpitch)
|
||||
{
|
||||
const uint8* RESTRICT s0 = &src[srcpitch * 0];
|
||||
|
|
|
@ -682,17 +682,24 @@ __forceinline void GSClut::ExpandCLUT64_T16(const GSVector4i& hi, const GSVector
|
|||
|
||||
// TODO
|
||||
|
||||
static const GSVector4i s_bm(0x00007c00);
|
||||
static const GSVector4i s_gm(0x000003e0);
|
||||
static const GSVector4i s_rm(0x0000001f);
|
||||
GSVector4i GSClut::m_bm;
|
||||
GSVector4i GSClut::m_gm;
|
||||
GSVector4i GSClut::m_rm;
|
||||
|
||||
void GSClut::InitVectors()
|
||||
{
|
||||
m_bm = GSVector4i(0x00007c00);
|
||||
m_gm = GSVector4i(0x000003e0);
|
||||
m_rm = GSVector4i(0x0000001f);
|
||||
}
|
||||
|
||||
void GSClut::Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA)
|
||||
{
|
||||
ASSERT((w & 7) == 0);
|
||||
|
||||
const GSVector4i rm = s_rm;
|
||||
const GSVector4i gm = s_gm;
|
||||
const GSVector4i bm = s_bm;
|
||||
const GSVector4i rm = m_rm;
|
||||
const GSVector4i gm = m_gm;
|
||||
const GSVector4i bm = m_bm;
|
||||
|
||||
GSVector4i TA0(TEXA.TA0 << 24);
|
||||
GSVector4i TA1(TEXA.TA1 << 24);
|
||||
|
|
|
@ -30,6 +30,10 @@ class GSLocalMemory;
|
|||
|
||||
class alignas(32) GSClut : public GSAlignedClass<32>
|
||||
{
|
||||
static GSVector4i m_bm;
|
||||
static GSVector4i m_gm;
|
||||
static GSVector4i m_rm;
|
||||
|
||||
GSLocalMemory* m_mem;
|
||||
|
||||
uint32 m_CBP[2];
|
||||
|
@ -93,6 +97,8 @@ class alignas(32) GSClut : public GSAlignedClass<32>
|
|||
static void Expand16(const uint16* RESTRICT src, uint32* RESTRICT dst, int w, const GIFRegTEXA& TEXA);
|
||||
|
||||
public:
|
||||
static void InitVectors();
|
||||
|
||||
GSClut(GSLocalMemory* mem);
|
||||
virtual ~GSClut();
|
||||
|
||||
|
|
|
@ -516,7 +516,7 @@ CRC::Game CRC::m_games[] =
|
|||
{0x06A7506A, SacredBlaze, JP, 0},
|
||||
};
|
||||
|
||||
hash_map<uint32, CRC::Game*> CRC::m_map;
|
||||
map<uint32, CRC::Game*> CRC::m_map;
|
||||
|
||||
string ToLower( string str )
|
||||
{
|
||||
|
@ -563,7 +563,7 @@ CRC::Game CRC::Lookup(uint32 crc)
|
|||
printf("[FIXME] GSdx: Duplicate CRC: Overall: %d\n", crcDups);
|
||||
}
|
||||
|
||||
hash_map<uint32, Game*>::iterator i = m_map.find(crc);
|
||||
auto i = m_map.find(crc);
|
||||
|
||||
if(i != m_map.end())
|
||||
{
|
||||
|
|
|
@ -211,7 +211,7 @@ public:
|
|||
|
||||
private:
|
||||
static Game m_games[];
|
||||
static hash_map<uint32, Game*> m_map;
|
||||
static map<uint32, Game*> m_map;
|
||||
|
||||
public:
|
||||
static Game Lookup(uint32 crc);
|
||||
|
|
|
@ -44,37 +44,55 @@ alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] =
|
|||
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00},
|
||||
};
|
||||
|
||||
const GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||
GSVector8 GSDrawScanlineCodeGenerator::m_log2_coef[4];
|
||||
#else
|
||||
GSVector4i GSDrawScanlineCodeGenerator::m_test[8];
|
||||
GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4];
|
||||
#endif
|
||||
|
||||
void GSDrawScanlineCodeGenerator::InitVectors()
|
||||
{
|
||||
GSVector8(0.204446009836232697516f),
|
||||
GSVector8(-1.04913055217340124191f),
|
||||
GSVector8(2.28330284476918490682f),
|
||||
GSVector8(1.0f),
|
||||
};
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 log2_coef[4] =
|
||||
{
|
||||
GSVector8(0.204446009836232697516f),
|
||||
GSVector8(-1.04913055217340124191f),
|
||||
GSVector8(2.28330284476918490682f),
|
||||
GSVector8(1.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(log2_coef); ++n)
|
||||
m_log2_coef[n] = log2_coef[n];
|
||||
|
||||
#else
|
||||
GSVector4i test[8] =
|
||||
{
|
||||
GSVector4i::zero(),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector4i::zero(),
|
||||
};
|
||||
|
||||
const GSVector4i GSDrawScanlineCodeGenerator::m_test[8] =
|
||||
{
|
||||
GSVector4i::zero(),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector4i::zero(),
|
||||
};
|
||||
GSVector4 log2_coef[4] =
|
||||
{
|
||||
GSVector4(0.204446009836232697516f),
|
||||
GSVector4(-1.04913055217340124191f),
|
||||
GSVector4(2.28330284476918490682f),
|
||||
GSVector4(1.0f),
|
||||
};
|
||||
|
||||
const GSVector4 GSDrawScanlineCodeGenerator::m_log2_coef[4] =
|
||||
{
|
||||
GSVector4(0.204446009836232697516f),
|
||||
GSVector4(-1.04913055217340124191f),
|
||||
GSVector4(2.28330284476918490682f),
|
||||
GSVector4(1.0f),
|
||||
};
|
||||
for (size_t n = 0; n < countof(test); ++n)
|
||||
m_test[n] = test[n];
|
||||
|
||||
for (size_t n = 0; n < countof(log2_coef); ++n)
|
||||
m_log2_coef[n] = log2_coef[n];
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
GSDrawScanlineCodeGenerator::GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
|
|
|
@ -136,10 +136,11 @@ public:
|
|||
|
||||
#if _M_SSE >= 0x501
|
||||
alignas(8) static const uint8 m_test[16][8];
|
||||
static const GSVector8 m_log2_coef[4];
|
||||
static GSVector8 m_log2_coef[4];
|
||||
#else
|
||||
static const GSVector4i m_test[8];
|
||||
static const GSVector4 m_log2_coef[4];
|
||||
static GSVector4i m_test[8];
|
||||
static GSVector4 m_log2_coef[4];
|
||||
#endif
|
||||
|
||||
static void InitVectors();
|
||||
};
|
||||
|
|
|
@ -74,6 +74,13 @@ typedef struct
|
|||
|
||||
#pragma pack(pop)
|
||||
|
||||
static GSVector4 GSRendererCL::m_pos_scale;
|
||||
|
||||
void GSRendererCL::InitVectors()
|
||||
{
|
||||
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
|
||||
}
|
||||
|
||||
GSRendererCL::GSRendererCL()
|
||||
: m_vb_count(0)
|
||||
, m_synced(true)
|
||||
|
@ -200,8 +207,6 @@ GSTexture* GSRendererCL::GetOutput(int i, int& y_offset)
|
|||
return m_texture[i];
|
||||
}
|
||||
|
||||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
|
||||
|
||||
template<uint32 primclass, uint32 tme, uint32 fst>
|
||||
void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count)
|
||||
{
|
||||
|
@ -214,7 +219,7 @@ void GSRendererCL::ConvertVertexBuffer(GSVertexCL* RESTRICT dst, const GSVertex*
|
|||
|
||||
GSVector4i xyzuvf(src->m[1]);
|
||||
|
||||
dst->p = (GSVector4(xyzuvf.upl16() - o) * g_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
|
||||
dst->p = (GSVector4(xyzuvf.upl16() - o) * m_pos_scale).xyxy(GSVector4::cast(xyzuvf.ywyw())); // pass zf as uints
|
||||
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
||||
|
|
|
@ -32,6 +32,8 @@ struct alignas(32) GSVertexCL
|
|||
|
||||
class GSRendererCL : public GSRenderer
|
||||
{
|
||||
static GSVector4 m_pos_scale;
|
||||
|
||||
typedef void (GSRendererCL::*ConvertVertexBufferPtr)(GSVertexCL* RESTRICT dst, const GSVertex* RESTRICT src, size_t count);
|
||||
|
||||
ConvertVertexBufferPtr m_cvb[4][2][2];
|
||||
|
@ -261,6 +263,8 @@ protected:
|
|||
bool SetupParameter(TFXJob* job, TFXParameter* pb, GSVertexCL* vertex, size_t vertex_count, const uint32* index, size_t index_count);
|
||||
|
||||
public:
|
||||
static void InitVectors();
|
||||
|
||||
GSRendererCL();
|
||||
virtual ~GSRendererCL();
|
||||
};
|
||||
|
|
|
@ -26,11 +26,19 @@
|
|||
|
||||
static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
||||
|
||||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
GSVector4 GSRendererSW::m_pos_scale;
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 GSRendererSW::m_pos_scale2;
|
||||
#endif
|
||||
|
||||
void GSRendererSW::InitVectors()
|
||||
{
|
||||
m_pos_scale = GSVector4(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
const GSVector8 g_pos_scale2(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
m_pos_scale2 = GSVector8(1.0f / 16, 1.0f / 16, 1.0f, 128.0f, 1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
#endif
|
||||
}
|
||||
|
||||
GSRendererSW::GSRendererSW(int threads)
|
||||
: m_fzb(NULL)
|
||||
|
@ -294,7 +302,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
|
|||
GSVector8i xy = xyzuvf.upl16() - o2;
|
||||
GSVector8i zf = xyzuvf.ywww().min_u32(GSVector8i::xffffff00());
|
||||
|
||||
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * g_pos_scale2;
|
||||
GSVector8 p = GSVector8(xy).xyxy(GSVector8(zf) + (GSVector8::m_x4f800000 & GSVector8::cast(zf.sra32(31)))) * m_pos_scale2;
|
||||
GSVector8 c = GSVector8(GSVector8i::cast(stcq).uph8().upl16() << 7);
|
||||
|
||||
GSVector8 t = GSVector8::zero();
|
||||
|
@ -364,7 +372,7 @@ void GSRendererSW::ConvertVertexBuffer(GSVertexSW* RESTRICT dst, const GSVertex*
|
|||
|
||||
#endif
|
||||
|
||||
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
dst->p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * m_pos_scale;
|
||||
dst->c = GSVector4(GSVector4i::cast(stcq).zzzz().u8to32() << 7);
|
||||
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
|
|
@ -27,6 +27,11 @@
|
|||
|
||||
class GSRendererSW : public GSRenderer
|
||||
{
|
||||
static GSVector4 m_pos_scale;
|
||||
#if _M_SSE >= 0x501
|
||||
static GSVector8 m_pos_scale2;
|
||||
#endif
|
||||
|
||||
class SharedData : public GSDrawScanline::SharedData
|
||||
{
|
||||
struct alignas(16) TextureLevel
|
||||
|
@ -95,6 +100,8 @@ protected:
|
|||
bool GetScanlineGlobalData(SharedData* data);
|
||||
|
||||
public:
|
||||
static void InitVectors();
|
||||
|
||||
GSRendererSW(int threads);
|
||||
virtual ~GSRendererSW();
|
||||
};
|
||||
|
|
|
@ -23,32 +23,44 @@
|
|||
#include "GSSetupPrimCodeGenerator.h"
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 GSSetupPrimCodeGenerator::m_shift[9];
|
||||
#else
|
||||
GSVector4 GSSetupPrimCodeGenerator::m_shift[5];
|
||||
#endif
|
||||
|
||||
const GSVector8 GSSetupPrimCodeGenerator::m_shift[9] =
|
||||
void GSSetupPrimCodeGenerator::InitVectors()
|
||||
{
|
||||
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
|
||||
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
|
||||
GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f),
|
||||
GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f),
|
||||
GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f),
|
||||
GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8 shift[9] =
|
||||
{
|
||||
GSVector8(8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f, 8.0f),
|
||||
GSVector8(0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f),
|
||||
GSVector8(-1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f),
|
||||
GSVector8(-2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f),
|
||||
GSVector8(-3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f, 4.0f),
|
||||
GSVector8(-4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector8(-5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector8(-6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector8(-7.0f, -6.0f, -5.0f, -4.0f, -3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(shift); ++n)
|
||||
m_shift[n] = shift[n];
|
||||
|
||||
#else
|
||||
GSVector4 shift[5] =
|
||||
{
|
||||
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
|
||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
|
||||
const GSVector4 GSSetupPrimCodeGenerator::m_shift[5] =
|
||||
{
|
||||
GSVector4(4.0f, 4.0f, 4.0f, 4.0f),
|
||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector4(-1.0f, 0.0f, 1.0f, 2.0f),
|
||||
GSVector4(-2.0f, -1.0f, 0.0f, 1.0f),
|
||||
GSVector4(-3.0f, -2.0f, -1.0f, 0.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(shift); ++n)
|
||||
m_shift[n] = shift[n];
|
||||
#endif
|
||||
}
|
||||
|
||||
GSSetupPrimCodeGenerator::GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
|
|
|
@ -43,8 +43,10 @@ public:
|
|||
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
static const GSVector8 m_shift[9];
|
||||
static GSVector8 m_shift[9];
|
||||
#else
|
||||
static const GSVector4 m_shift[5];
|
||||
static GSVector4 m_shift[5];
|
||||
#endif
|
||||
|
||||
static void InitVectors();
|
||||
};
|
||||
|
|
|
@ -35,58 +35,60 @@
|
|||
|
||||
const char* GSUtil::GetLibName()
|
||||
{
|
||||
// TODO: critsec
|
||||
// The following ifdef mess is courtesy of "static string str;"
|
||||
// being optimised by GCC to be unusable by older CPUs. Enjoy!
|
||||
static char name[255];
|
||||
|
||||
static string str;
|
||||
snprintf(name, sizeof(name), "GSdx "
|
||||
|
||||
if(str.empty())
|
||||
{
|
||||
str = "GSdx";
|
||||
#ifdef _WIN32
|
||||
"%lld "
|
||||
#endif
|
||||
#ifdef _M_AMD64
|
||||
"64-bit "
|
||||
#endif
|
||||
#ifdef __INTEL_COMPILER
|
||||
"(Intel C++ %d.%02d %s)",
|
||||
#elif _MSC_VER
|
||||
"(MSVC %d.%02d %s)",
|
||||
#elif __clang__
|
||||
"(clang %d.%d.%d %s)",
|
||||
#elif __GNUC__
|
||||
"(GCC %d.%d.%d %s)",
|
||||
#else
|
||||
"(%s)",
|
||||
#endif
|
||||
#ifdef _WIN32
|
||||
SVN_REV,
|
||||
#endif
|
||||
#ifdef __INTEL_COMPILER
|
||||
__INTEL_COMPILER / 100, __INTEL_COMPILER % 100,
|
||||
#elif _MSC_VER
|
||||
_MSC_VER / 100, _MSC_VER % 100,
|
||||
#elif __clang__
|
||||
__clang_major__, __clang_minor__, __clang_patchlevel__,
|
||||
#elif __GNUC__
|
||||
__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__,
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
str += format(" %lld", SVN_REV);
|
||||
if(SVN_MODS) str += "m";
|
||||
#endif
|
||||
#if _M_SSE >= 0x501
|
||||
"AVX2"
|
||||
#elif _M_SSE >= 0x500
|
||||
"AVX"
|
||||
#elif _M_SSE >= 0x402
|
||||
"SSE4.2"
|
||||
#elif _M_SSE >= 0x401
|
||||
"SSE4.1"
|
||||
#elif _M_SSE >= 0x301
|
||||
"SSSE3"
|
||||
#elif _M_SSE >= 0x200
|
||||
"SSE2"
|
||||
#elif _M_SSE >= 0x100
|
||||
"SSE"
|
||||
#endif
|
||||
);
|
||||
|
||||
#ifdef _M_AMD64
|
||||
str += " 64-bit";
|
||||
#endif
|
||||
|
||||
list<string> sl;
|
||||
|
||||
#ifdef __INTEL_COMPILER
|
||||
sl.push_back(format("Intel C++ %d.%02d", __INTEL_COMPILER / 100, __INTEL_COMPILER % 100));
|
||||
#elif _MSC_VER
|
||||
sl.push_back(format("MSVC %d.%02d", _MSC_VER / 100, _MSC_VER % 100));
|
||||
#elif __GNUC__
|
||||
sl.push_back(format("GCC %d.%d.%d", __GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__));
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
sl.push_back("AVX2");
|
||||
#elif _M_SSE >= 0x500
|
||||
sl.push_back("AVX");
|
||||
#elif _M_SSE >= 0x402
|
||||
sl.push_back("SSE42");
|
||||
#elif _M_SSE >= 0x401
|
||||
sl.push_back("SSE41");
|
||||
#elif _M_SSE >= 0x301
|
||||
sl.push_back("SSSE3");
|
||||
#elif _M_SSE >= 0x200
|
||||
sl.push_back("SSE2");
|
||||
#elif _M_SSE >= 0x100
|
||||
sl.push_back("SSE");
|
||||
#endif
|
||||
|
||||
for(list<string>::iterator i = sl.begin(); i != sl.end(); )
|
||||
{
|
||||
if(i == sl.begin()) str += " (";
|
||||
str += *i;
|
||||
str += ++i != sl.end() ? ", " : ")";
|
||||
}
|
||||
}
|
||||
|
||||
return str.c_str();
|
||||
return name;
|
||||
}
|
||||
|
||||
static class GSUtilMaps
|
||||
|
@ -203,22 +205,31 @@ bool GSUtil::CheckSSE()
|
|||
{
|
||||
Xbyak::util::Cpu cpu;
|
||||
Xbyak::util::Cpu::Type type;
|
||||
const char* instruction_set = "";
|
||||
|
||||
#if _M_SSE >= 0x500
|
||||
#if _M_SSE >= 0x501
|
||||
type = Xbyak::util::Cpu::tAVX2;
|
||||
instruction_set = "AVX2";
|
||||
#elif _M_SSE >= 0x500
|
||||
type = Xbyak::util::Cpu::tAVX;
|
||||
instruction_set = "AVX";
|
||||
#elif _M_SSE >= 0x402
|
||||
type = Xbyak::util::Cpu::tSSE42;
|
||||
instruction_set = "SSE4.2";
|
||||
#elif _M_SSE >= 0x401
|
||||
type = Xbyak::util::Cpu::tSSE41;
|
||||
instruction_set = "SSE4.1";
|
||||
#elif _M_SSE >= 0x301
|
||||
type = Xbyak::util::Cpu::tSSSE3;
|
||||
instruction_set = "SSSE3";
|
||||
#elif _M_SSE >= 0x200
|
||||
type = Xbyak::util::Cpu::tSSE2;
|
||||
instruction_set = "SSE2";
|
||||
#endif
|
||||
|
||||
if(!cpu.has(type))
|
||||
{
|
||||
fprintf(stderr, "This CPU does not support SSE %d.%02d", _M_SSE >> 8, _M_SSE & 0xff);
|
||||
fprintf(stderr, "This CPU does not support %s\n", instruction_set);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
|
|
@ -22,148 +22,196 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSVector.h"
|
||||
|
||||
const GSVector4i GSVector4i::m_xff[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
GSVector4i GSVector4i::m_xff[17];
|
||||
GSVector4i GSVector4i::m_x0f[17];
|
||||
|
||||
const GSVector4i GSVector4i::m_x0f[17] =
|
||||
void GSVector4i::InitVectors()
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
GSVector4i xff[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
|
||||
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
|
||||
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
const GSVector4 GSVector4::m_half(0.5f);
|
||||
const GSVector4 GSVector4::m_one(1.0f);
|
||||
const GSVector4 GSVector4::m_two(2.0f);
|
||||
const GSVector4 GSVector4::m_four(4.0f);
|
||||
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||
const GSVector4 GSVector4::m_x4f800000(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
|
||||
const GSVector4 GSVector4::m_max(FLT_MAX);
|
||||
const GSVector4 GSVector4::m_min(FLT_MIN);
|
||||
GSVector4i x0f[17] =
|
||||
{
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(xff); ++n)
|
||||
m_xff[n] = xff[n];
|
||||
|
||||
for (size_t n = 0; n < countof(x0f); ++n)
|
||||
m_x0f[n] = x0f[n];
|
||||
}
|
||||
|
||||
GSVector4 GSVector4::m_ps0123;
|
||||
GSVector4 GSVector4::m_ps4567;
|
||||
GSVector4 GSVector4::m_half;
|
||||
GSVector4 GSVector4::m_one;
|
||||
GSVector4 GSVector4::m_two;
|
||||
GSVector4 GSVector4::m_four;
|
||||
GSVector4 GSVector4::m_x4b000000;
|
||||
GSVector4 GSVector4::m_x4f800000;
|
||||
GSVector4 GSVector4::m_max;
|
||||
GSVector4 GSVector4::m_min;
|
||||
|
||||
void GSVector4::InitVectors()
|
||||
{
|
||||
m_ps0123 = GSVector4(0.0f, 1.0f, 2.0f, 3.0f);
|
||||
m_ps4567 = GSVector4(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
m_half = GSVector4(0.5f);
|
||||
m_one = GSVector4(1.0f);
|
||||
m_two = GSVector4(2.0f);
|
||||
m_four = GSVector4(4.0f);
|
||||
m_x4b000000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
|
||||
m_x4f800000 = GSVector4(_mm_castsi128_ps(_mm_set1_epi32(0x4f800000)));
|
||||
m_max = GSVector4(FLT_MAX);
|
||||
m_min = GSVector4(FLT_MIN);
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
const GSVector8 GSVector8::m_half(0.5f);
|
||||
const GSVector8 GSVector8::m_one(1.0f);
|
||||
const GSVector8 GSVector8::m_x7fffffff(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
|
||||
const GSVector8 GSVector8::m_x80000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
|
||||
const GSVector8 GSVector8::m_x4b000000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
|
||||
const GSVector8 GSVector8::m_x4f800000(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
|
||||
const GSVector8 GSVector8::m_max(FLT_MAX);
|
||||
const GSVector8 GSVector8::m_min(FLT_MIN);
|
||||
GSVector8 GSVector8::m_half;
|
||||
GSVector8 GSVector8::m_one;
|
||||
GSVector8 GSVector8::m_x7fffffff;
|
||||
GSVector8 GSVector8::m_x80000000;
|
||||
GSVector8 GSVector8::m_x4b000000;
|
||||
GSVector8 GSVector8::m_x4f800000;
|
||||
GSVector8 GSVector8::m_max;
|
||||
GSVector8 GSVector8::m_min;
|
||||
|
||||
void GSVector8::InitVectors()
|
||||
{
|
||||
m_half = GSVector8(0.5f);
|
||||
m_one = GSVector8(1.0f);
|
||||
m_x7fffffff = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x7fffffff)));
|
||||
m_x80000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x80000000)));
|
||||
m_x4b000000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4b000000)));
|
||||
m_x4f800000 = GSVector8(_mm256_castsi256_ps(_mm256_set1_epi32(0x4f800000)));
|
||||
m_max = GSVector8(FLT_MAX);
|
||||
m_min = GSVector8(FLT_MIN);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8i GSVector8i::m_xff[33];
|
||||
GSVector8i GSVector8i::m_x0f[33];
|
||||
|
||||
const GSVector8i GSVector8i::m_xff[33] =
|
||||
void GSVector8i::InitVectors()
|
||||
{
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
GSVector8i xff[33] =
|
||||
{
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector8i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
|
||||
const GSVector8i GSVector8i::m_x0f[33] =
|
||||
{
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
GSVector8i x0f[33] =
|
||||
{
|
||||
GSVector8i(0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector8i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(xff); ++n)
|
||||
m_xff[n] = xff[n];
|
||||
|
||||
for (size_t n = 0; n < countof(x0f); ++n)
|
||||
m_x0f[n] = x0f[n];
|
||||
}
|
||||
#endif
|
||||
|
||||
GSVector4i GSVector4i::fit(int arx, int ary) const
|
||||
|
|
|
@ -92,8 +92,8 @@ class GSVector8i;
|
|||
|
||||
class alignas(16) GSVector4i
|
||||
{
|
||||
static const GSVector4i m_xff[17];
|
||||
static const GSVector4i m_x0f[17];
|
||||
static GSVector4i m_xff[17];
|
||||
static GSVector4i m_x0f[17];
|
||||
|
||||
public:
|
||||
union
|
||||
|
@ -114,6 +114,8 @@ public:
|
|||
__m128i m;
|
||||
};
|
||||
|
||||
static void InitVectors();
|
||||
|
||||
__forceinline GSVector4i()
|
||||
{
|
||||
}
|
||||
|
@ -2442,16 +2444,18 @@ public:
|
|||
__m128 m;
|
||||
};
|
||||
|
||||
static const GSVector4 m_ps0123;
|
||||
static const GSVector4 m_ps4567;
|
||||
static const GSVector4 m_half;
|
||||
static const GSVector4 m_one;
|
||||
static const GSVector4 m_two;
|
||||
static const GSVector4 m_four;
|
||||
static const GSVector4 m_x4b000000;
|
||||
static const GSVector4 m_x4f800000;
|
||||
static const GSVector4 m_max;
|
||||
static const GSVector4 m_min;
|
||||
static GSVector4 m_ps0123;
|
||||
static GSVector4 m_ps4567;
|
||||
static GSVector4 m_half;
|
||||
static GSVector4 m_one;
|
||||
static GSVector4 m_two;
|
||||
static GSVector4 m_four;
|
||||
static GSVector4 m_x4b000000;
|
||||
static GSVector4 m_x4f800000;
|
||||
static GSVector4 m_max;
|
||||
static GSVector4 m_min;
|
||||
|
||||
static void InitVectors();
|
||||
|
||||
__forceinline GSVector4()
|
||||
{
|
||||
|
@ -3343,8 +3347,8 @@ GSVector.h:2973:15: error: shadows template parm 'int i'
|
|||
|
||||
class alignas(32) GSVector8i
|
||||
{
|
||||
static const GSVector8i m_xff[33];
|
||||
static const GSVector8i m_x0f[33];
|
||||
static GSVector8i m_xff[33];
|
||||
static GSVector8i m_x0f[33];
|
||||
|
||||
public:
|
||||
union
|
||||
|
@ -3365,6 +3369,8 @@ public:
|
|||
__m128i m0, m1;
|
||||
};
|
||||
|
||||
static void InitVectors();
|
||||
|
||||
__forceinline GSVector8i() {}
|
||||
|
||||
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
|
||||
|
@ -5154,14 +5160,16 @@ public:
|
|||
__m128 m0, m1;
|
||||
};
|
||||
|
||||
static const GSVector8 m_half;
|
||||
static const GSVector8 m_one;
|
||||
static const GSVector8 m_x7fffffff;
|
||||
static const GSVector8 m_x80000000;
|
||||
static const GSVector8 m_x4b000000;
|
||||
static const GSVector8 m_x4f800000;
|
||||
static const GSVector8 m_max;
|
||||
static const GSVector8 m_min;
|
||||
static GSVector8 m_half;
|
||||
static GSVector8 m_one;
|
||||
static GSVector8 m_x7fffffff;
|
||||
static GSVector8 m_x80000000;
|
||||
static GSVector8 m_x4b000000;
|
||||
static GSVector8 m_x4f800000;
|
||||
static GSVector8 m_max;
|
||||
static GSVector8 m_min;
|
||||
|
||||
static void InitVectors();
|
||||
|
||||
__forceinline GSVector8()
|
||||
{
|
||||
|
|
|
@ -24,7 +24,12 @@
|
|||
#include "GSUtil.h"
|
||||
#include "GSState.h"
|
||||
|
||||
const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
|
||||
GSVector4 GSVertexTrace::s_minmax;
|
||||
|
||||
void GSVertexTrace::InitVectors()
|
||||
{
|
||||
s_minmax = GSVector4(FLT_MAX, -FLT_MAX);
|
||||
}
|
||||
|
||||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_state(state)
|
||||
|
|
|
@ -38,7 +38,7 @@ public:
|
|||
protected:
|
||||
const GSState* m_state;
|
||||
|
||||
static const GSVector4 s_minmax;
|
||||
static GSVector4 s_minmax;
|
||||
|
||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
||||
|
||||
|
@ -69,6 +69,8 @@ public:
|
|||
GSVector2 m_lod; // x = min, y = max
|
||||
|
||||
public:
|
||||
static void InitVectors();
|
||||
|
||||
GSVertexTrace(const GSState* state);
|
||||
virtual ~GSVertexTrace() {}
|
||||
|
||||
|
|
|
@ -127,6 +127,21 @@ GSdxApp theApp;
|
|||
|
||||
GSdxApp::GSdxApp()
|
||||
{
|
||||
// Empty constructor causes an illegal instruction exception on an SSE4.2 machine on Windows.
|
||||
// Non-empty doesn't, but raises a SIGILL signal when compiled against GCC 6.1.1.
|
||||
// So here's a compromise.
|
||||
#ifdef _WIN32
|
||||
Init();
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSdxApp::Init()
|
||||
{
|
||||
static bool is_initialised = false;
|
||||
if (is_initialised)
|
||||
return;
|
||||
is_initialised = true;
|
||||
|
||||
m_ini = "inis/GSdx.ini";
|
||||
m_section = "Settings";
|
||||
|
||||
|
|
|
@ -35,7 +35,8 @@ class GSdxApp
|
|||
public:
|
||||
GSdxApp();
|
||||
|
||||
void* GetModuleHandlePtr();
|
||||
void Init();
|
||||
void* GetModuleHandlePtr();
|
||||
|
||||
#ifdef _WIN32
|
||||
HMODULE GetModuleHandle() {return (HMODULE)GetModuleHandlePtr();}
|
||||
|
|
|
@ -57,8 +57,33 @@ EXPORT_C_(uint32) PSEgetLibVersion()
|
|||
return version << 16 | revision << 8 | PLUGIN_VERSION;
|
||||
}
|
||||
|
||||
static void InitVectors()
|
||||
{
|
||||
GSVector4i::InitVectors();
|
||||
GSVector4::InitVectors();
|
||||
#if _M_SSE >= 0x500
|
||||
GSVector8::InitVectors();
|
||||
#endif
|
||||
#if _M_SSE >= 0x501
|
||||
GSVector8i::InitVectors();
|
||||
#endif
|
||||
|
||||
GPUDrawScanlineCodeGenerator::InitVectors();
|
||||
GPULocalMemory::InitVectors();
|
||||
GPUSetupPrimCodeGenerator::InitVectors();
|
||||
}
|
||||
|
||||
EXPORT_C_(int32) GPUinit()
|
||||
{
|
||||
if(!GSUtil::CheckSSE())
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
theApp.Init();
|
||||
|
||||
InitVectors();
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -140,6 +165,13 @@ EXPORT_C_(int32) GPUopen(void* hWnd)
|
|||
|
||||
EXPORT_C_(int32) GPUconfigure()
|
||||
{
|
||||
if(!GSUtil::CheckSSE())
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
theApp.Init();
|
||||
|
||||
#ifdef _WIN32
|
||||
|
||||
GPUSettingsDlg dlg;
|
||||
|
@ -160,6 +192,11 @@ EXPORT_C_(int32) GPUconfigure()
|
|||
|
||||
EXPORT_C_(int32) GPUtest()
|
||||
{
|
||||
if(!GSUtil::CheckSSE())
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
|
|
@ -1010,18 +1010,8 @@ void GPUDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm&
|
|||
movdqa(a, b);
|
||||
}
|
||||
|
||||
const GSVector4i GPUDrawScanlineCodeGenerator::m_test[8] =
|
||||
{
|
||||
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000),
|
||||
GSVector4i::zero(),
|
||||
};
|
||||
|
||||
GSVector4i GPUDrawScanlineCodeGenerator::m_test[8];
|
||||
alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
|
||||
{
|
||||
{7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1, 7, 0, 6, 1},
|
||||
|
@ -1029,3 +1019,21 @@ alignas(32) const uint16_t GPUDrawScanlineCodeGenerator::m_dither[4][16] =
|
|||
{1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7, 1, 6, 0, 7},
|
||||
{4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2, 4, 3, 5, 2},
|
||||
};
|
||||
|
||||
void GPUDrawScanlineCodeGenerator::InitVectors()
|
||||
{
|
||||
GSVector4i test[8] =
|
||||
{
|
||||
GSVector4i(0xffff0000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0xffff0000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffffffff, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0xffff0000, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffffffff),
|
||||
GSVector4i(0x00000000, 0x00000000, 0x00000000, 0xffff0000),
|
||||
GSVector4i::zero(),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(test); ++n)
|
||||
m_test[n] = test[n];
|
||||
}
|
||||
|
|
|
@ -55,6 +55,8 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
public:
|
||||
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static GSVector4i m_test[8];
|
||||
alignas(32) static const uint16 m_dither[4][16];
|
||||
|
||||
static void InitVectors();
|
||||
};
|
||||
|
|
|
@ -23,15 +23,23 @@
|
|||
#include "GPULocalMemory.h"
|
||||
#include "GSdx.h"
|
||||
|
||||
const GSVector4i GPULocalMemory::m_xxxa(0x00008000);
|
||||
const GSVector4i GPULocalMemory::m_xxbx(0x00007c00);
|
||||
const GSVector4i GPULocalMemory::m_xgxx(0x000003e0);
|
||||
const GSVector4i GPULocalMemory::m_rxxx(0x0000001f);
|
||||
GSVector4i GPULocalMemory::m_xxxa;
|
||||
GSVector4i GPULocalMemory::m_xxbx;
|
||||
GSVector4i GPULocalMemory::m_xgxx;
|
||||
GSVector4i GPULocalMemory::m_rxxx;
|
||||
|
||||
#define VM_REAL_SIZE ((1 << (12 + 11)) * sizeof(uint16))
|
||||
#define VM_ALLOC_SIZE (VM_REAL_SIZE * 2)
|
||||
#define TEX_ALLOC_SIZE (256 * 256 * (1 + 1 + 4) * 32)
|
||||
|
||||
void GPULocalMemory::InitVectors()
|
||||
{
|
||||
m_xxxa = GSVector4i(0x00008000);
|
||||
m_xxbx = GSVector4i(0x00007c00);
|
||||
m_xgxx = GSVector4i(0x000003e0);
|
||||
m_rxxx = GSVector4i(0x0000001f);
|
||||
}
|
||||
|
||||
GPULocalMemory::GPULocalMemory()
|
||||
{
|
||||
m_scale.x = std::min<int>(std::max<int>(theApp.GetConfigI("scale_x"), 0), 2);
|
||||
|
|
|
@ -26,10 +26,10 @@
|
|||
|
||||
class GPULocalMemory
|
||||
{
|
||||
static const GSVector4i m_xxxa;
|
||||
static const GSVector4i m_xxbx;
|
||||
static const GSVector4i m_xgxx;
|
||||
static const GSVector4i m_rxxx;
|
||||
static GSVector4i m_xxxa;
|
||||
static GSVector4i m_xxbx;
|
||||
static GSVector4i m_xgxx;
|
||||
static GSVector4i m_rxxx;
|
||||
|
||||
uint16* m_vm;
|
||||
|
||||
|
@ -50,6 +50,8 @@ class GPULocalMemory
|
|||
GSVector2i m_scale;
|
||||
|
||||
public:
|
||||
static void InitVectors();
|
||||
|
||||
GPULocalMemory();
|
||||
virtual ~GPULocalMemory();
|
||||
|
||||
|
|
|
@ -220,9 +220,17 @@ void GPUSetupPrimCodeGenerator::Generate()
|
|||
ret();
|
||||
}
|
||||
|
||||
const GSVector4 GPUSetupPrimCodeGenerator::m_shift[3] =
|
||||
GSVector4 GPUSetupPrimCodeGenerator::m_shift[3];
|
||||
|
||||
void GPUSetupPrimCodeGenerator::InitVectors()
|
||||
{
|
||||
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
|
||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
|
||||
};
|
||||
GSVector4 shift[3] =
|
||||
{
|
||||
GSVector4(8.0f, 8.0f, 8.0f, 8.0f),
|
||||
GSVector4(0.0f, 1.0f, 2.0f, 3.0f),
|
||||
GSVector4(4.0f, 5.0f, 6.0f, 7.0f),
|
||||
};
|
||||
|
||||
for (size_t n = 0; n < countof(shift); ++n)
|
||||
m_shift[n] = shift[n];
|
||||
}
|
||||
|
|
|
@ -36,5 +36,7 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
public:
|
||||
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||
|
||||
static const GSVector4 m_shift[3];
|
||||
static GSVector4 m_shift[3];
|
||||
|
||||
static void InitVectors();
|
||||
};
|
Loading…
Reference in New Issue