GSdx: re-implemented the drawing pipeline in c++, just for reference and easier debugging.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4972 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-11-25 23:48:59 +00:00
parent 6cf12e5721
commit 9d54677055
13 changed files with 1469 additions and 72 deletions

View File

@ -80,3 +80,32 @@ void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats, uint64 frame)
{ {
m_ds_map.UpdateStats(stats, frame); m_ds_map.UpdateStats(stats, frame);
} }
void GPUDrawScanline::PrintStats()
{
m_ds_map.PrintStats();
}
#ifndef JIT_DRAW
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
{
// TODO
}
void GPUDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
{
// TODO
}
void GPUDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
{
// TODO
}
void GPUDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
{
// TODO
}
#endif

View File

@ -43,5 +43,14 @@ public:
void BeginDraw(const void* param); void BeginDraw(const void* param);
void EndDraw(const GSRasterizerStats& stats, uint64 frame); void EndDraw(const GSRasterizerStats& stats, uint64 frame);
void PrintStats() {m_ds_map.PrintStats();} void PrintStats();
#ifndef JIT_DRAW
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
#endif
}; };

File diff suppressed because it is too large Load Diff

View File

@ -35,8 +35,6 @@ class GSDrawScanline : public IDrawScanline
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map; GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map; GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
template<class T, bool masked> template<class T, bool masked>
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m); void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
@ -54,5 +52,21 @@ public:
void BeginDraw(const void* param); void BeginDraw(const void* param);
void EndDraw(const GSRasterizerStats& stats, uint64 frame); void EndDraw(const GSRasterizerStats& stats, uint64 frame);
void PrintStats() {m_ds_map.PrintStats();} void PrintStats();
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
#ifndef JIT_DRAW
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
bool IsEdge() const {return m_global.sel.aa1;}
bool IsRect() const {return m_global.sel.IsSolidRect();}
bool TestAlpha(GSVector4i& test, GSVector4i& fm, GSVector4i& zm, const GSVector4i& ga);
void WritePixel(const GSVector4i& src, int addr, int i, uint32 psm);
#endif
}; };

View File

@ -30,9 +30,6 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
{ {
void operator = (const GSDrawScanlineCodeGenerator&); void operator = (const GSDrawScanlineCodeGenerator&);
static const GSVector4i m_test[8];
static const GSVector4 m_log2_coef[4];
GSScanlineSelector m_sel; GSScanlineSelector m_sel;
GSScanlineLocalData& m_local; GSScanlineLocalData& m_local;
@ -84,4 +81,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
public: public:
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
static const GSVector4i m_test[8];
static const GSVector4 m_log2_coef[4];
}; };

View File

@ -1648,8 +1648,8 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
if(m_sel.colclamp == 0) if(m_sel.colclamp == 0)
{ {
// c[0] &= 0x000000ff; // c[0] &= 0x00ff00ff;
// c[1] &= 0x000000ff; // c[1] &= 0x00ff00ff;
vpcmpeqd(xmm15, xmm15); vpcmpeqd(xmm15, xmm15);
vpsrlw(xmm15, 8); vpsrlw(xmm15, 8);

View File

@ -343,6 +343,8 @@ void GSDrawScanlineCodeGenerator::Init()
if(m_sel.edge) if(m_sel.edge)
{ {
// m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9);
vpshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); vpshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
vpshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3)); vpshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3));
vpsrlw(xmm3, 9); vpsrlw(xmm3, 9);
@ -1184,34 +1186,35 @@ return;
vmovq(xmm4, ptr[&m_local.gd->t.minmax]); vmovq(xmm4, ptr[&m_local.gd->t.minmax]);
vmovq(xmm2, ptr[&m_local.temp.uv[0].u32[0]]); vmovdqa(xmm2, ptr[&m_local.temp.uv[0]]);
vmovdqa(xmm5, xmm2);
vmovdqa(xmm3, ptr[&m_local.temp.uv[1]]);
vmovdqa(xmm6, xmm3);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]);
vpsrad(xmm2, xmm0); vpsrad(xmm2, xmm0);
vpsrlw(xmm1, xmm4, xmm0); vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1); vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1);
vmovq(xmm3, ptr[&m_local.temp.uv[0].u32[2]]);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]); vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]);
vpsrad(xmm3, xmm0); vpsrad(xmm5, xmm0);
vpsrlw(xmm1, xmm4, xmm0); vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1); vmovq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1);
vmovq(xmm5, ptr[&m_local.temp.uv[1].u32[0]]);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]); vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]);
vpsrad(xmm5, xmm0); vpsrad(xmm3, xmm0);
vpsrlw(xmm1, xmm4, xmm0); vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1); vmovq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1);
vmovq(xmm6, ptr[&m_local.temp.uv[1].u32[2]]);
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]); vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]);
vpsrad(xmm6, xmm0); vpsrad(xmm6, xmm0);
vpsrlw(xmm1, xmm4, xmm0); vpsrlw(xmm1, xmm4, xmm0);
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1); vmovq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1);
vpunpckldq(xmm2, xmm3); vpunpckldq(xmm2, xmm3);
vpunpckldq(xmm5, xmm6); vpunpckhdq(xmm5, xmm6);
vpunpckhqdq(xmm3, xmm2, xmm5); vpunpckhdq(xmm3, xmm2, xmm5);
vpunpcklqdq(xmm2, xmm5); vpunpckldq(xmm2, xmm5);
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2); vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3); vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
@ -2573,8 +2576,8 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
if(m_sel.colclamp == 0) if(m_sel.colclamp == 0)
{ {
// c[0] &= 0x000000ff; // c[0] &= 0x00ff00ff;
// c[1] &= 0x000000ff; // c[1] &= 0x00ff00ff;
vpcmpeqd(xmm7, xmm7); vpcmpeqd(xmm7, xmm7);
vpsrlw(xmm7, 8); vpsrlw(xmm7, 8);

View File

@ -343,6 +343,8 @@ void GSDrawScanlineCodeGenerator::Init()
if(m_sel.edge) if(m_sel.edge)
{ {
// m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9);
pshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2)); pshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
pshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3)); pshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3));
psrlw(xmm3, 9); psrlw(xmm3, 9);
@ -1232,28 +1234,29 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
movq(xmm4, ptr[&m_local.gd->t.minmax]); movq(xmm4, ptr[&m_local.gd->t.minmax]);
movq(xmm2, ptr[&m_local.temp.uv[0].u32[0]]); movdqa(xmm2, ptr[&m_local.temp.uv[0]]);
movdqa(xmm5, xmm2);
movdqa(xmm3, ptr[&m_local.temp.uv[1]]);
movdqa(xmm6, xmm3);
movd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]); movd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]);
psrad(xmm2, xmm0); psrad(xmm2, xmm0);
movdqa(xmm1, xmm4); movdqa(xmm1, xmm4);
psrlw(xmm1, xmm0); psrlw(xmm1, xmm0);
movq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1); movq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1);
movq(xmm3, ptr[&m_local.temp.uv[0].u32[2]]);
movd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]); movd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]);
psrad(xmm3, xmm0); psrad(xmm5, xmm0);
movdqa(xmm1, xmm4); movdqa(xmm1, xmm4);
psrlw(xmm1, xmm0); psrlw(xmm1, xmm0);
movq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1); movq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1);
movq(xmm5, ptr[&m_local.temp.uv[1].u32[0]]);
movd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]); movd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]);
psrad(xmm5, xmm0); psrad(xmm3, xmm0);
movdqa(xmm1, xmm4); movdqa(xmm1, xmm4);
psrlw(xmm1, xmm0); psrlw(xmm1, xmm0);
movq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1); movq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1);
movq(xmm6, ptr[&m_local.temp.uv[1].u32[2]]);
movd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]); movd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]);
psrad(xmm6, xmm0); psrad(xmm6, xmm0);
movdqa(xmm1, xmm4); movdqa(xmm1, xmm4);
@ -1261,10 +1264,10 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
movq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1); movq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1);
punpckldq(xmm2, xmm3); punpckldq(xmm2, xmm3);
punpckldq(xmm5, xmm6); punpckhdq(xmm5, xmm6);
movdqa(xmm3, xmm2); movdqa(xmm3, xmm2);
punpcklqdq(xmm2, xmm5); punpckldq(xmm2, xmm5);
punpckhqdq(xmm3, xmm5); punpckhdq(xmm3, xmm5);
movdqa(ptr[&m_local.temp.uv[0]], xmm2); movdqa(ptr[&m_local.temp.uv[0]], xmm2);
movdqa(ptr[&m_local.temp.uv[1]], xmm3); movdqa(ptr[&m_local.temp.uv[1]], xmm3);

View File

@ -277,6 +277,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
int i = (y0011 == y1221).mask() & 7; int i = (y0011 == y1221).mask() & 7;
// if(i == 0) => y0 < y1 < y2
// if(i == 1) => y0 == y1 < y2
// if(i == 4) => y0 < y1 == y2
if(i == 7) return; // y0 == y1 == y2 if(i == 7) return; // y0 == y1 == y2
GSVector4 tbf = y0011.xzxz(y1221).ceil(); GSVector4 tbf = y0011.xzxz(y1221).ceil();
@ -338,14 +342,25 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0 dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1 dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
GSVector4 x0; if(i & 1)
switch(i)
{ {
case 0: // y0 < y1 < y2 if(tb.y < tb.w)
case 4: // y0 < y1 == y2 {
edge = v[1 - j];
x0 = v[0].p.xxxx(); GSVector4 dy = tbmax.xxxx() - edge.p.yyyy();
edge.p = edge.p.insert<0, 1>(v[j].p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
edge += dedge * dy;
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p.xxxx());
}
}
else
{
GSVector4 x0 = v[0].p.xxxx();
if(tb.x < tb.z) if(tb.x < tb.z)
{ {
@ -374,30 +389,6 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p.xxxx()); DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p.xxxx());
} }
break;
case 1: // y0 == y1 < y2
if(tb.y < tb.w)
{
edge = v[1 - j];
GSVector4 dy = tbmax.xxxx() - edge.p.yyyy();
edge.p = edge.p.insert<0, 1>(v[j].p);
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
edge += dedge * dy;
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p.xxxx());
}
break;
default:
__assume(0);
} }
Flush(v, dscan); Flush(v, dscan);

View File

@ -27,6 +27,9 @@
#include "GSThread.h" #include "GSThread.h"
#include "GSAlignedClass.h" #include "GSAlignedClass.h"
//
#define JIT_DRAW
__aligned(class, 32) GSRasterizerData __aligned(class, 32) GSRasterizerData
{ {
public: public:
@ -62,11 +65,22 @@ public:
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0; virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
virtual void PrintStats() = 0; virtual void PrintStats() = 0;
#ifdef JIT_DRAW
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);} __forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);} __forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);} __forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);} __forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
#else
virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
#endif
__forceinline bool IsEdge() const {return m_de != NULL;} __forceinline bool IsEdge() const {return m_de != NULL;}
__forceinline bool IsRect() const {return m_dr != NULL;} __forceinline bool IsRect() const {return m_dr != NULL;}
}; };

View File

@ -146,8 +146,9 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has
struct struct
{ {
GSVector4i z, f; GSVector4 z;
GSVector4i s, t, q; GSVector4i f;
GSVector4 s, t, q;
GSVector4i rb, ga; GSVector4i rb, ga;
GSVector4i zs, zd; GSVector4i zs, zd;
GSVector4i uf, vf; GSVector4i uf, vf;

View File

@ -28,8 +28,6 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
{ {
void operator = (const GSSetupPrimCodeGenerator&); void operator = (const GSSetupPrimCodeGenerator&);
static const GSVector4 m_shift[5];
GSScanlineSelector m_sel; GSScanlineSelector m_sel;
GSScanlineLocalData& m_local; GSScanlineLocalData& m_local;
@ -43,4 +41,6 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
public: public:
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize); GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
static const GSVector4 m_shift[5];
}; };

View File

@ -151,7 +151,7 @@ public:
this->m = m; this->m = m;
} }
__forceinline explicit GSVector4i(const GSVector4& v); __forceinline explicit GSVector4i(const GSVector4& v, bool truncate = true);
__forceinline void operator = (const GSVector4i& v) __forceinline void operator = (const GSVector4i& v)
{ {
@ -796,41 +796,81 @@ public:
return GSVector4i(_mm_srai_epi16(m, i)); return GSVector4i(_mm_srai_epi16(m, i));
} }
__forceinline GSVector4i sra16(__m128i i) const
{
return GSVector4i(_mm_sra_epi16(m, i));
}
__forceinline GSVector4i sra32(int i) const __forceinline GSVector4i sra32(int i) const
{ {
return GSVector4i(_mm_srai_epi32(m, i)); return GSVector4i(_mm_srai_epi32(m, i));
} }
__forceinline GSVector4i sra32(__m128i i) const
{
return GSVector4i(_mm_sra_epi32(m, i));
}
__forceinline GSVector4i sll16(int i) const __forceinline GSVector4i sll16(int i) const
{ {
return GSVector4i(_mm_slli_epi16(m, i)); return GSVector4i(_mm_slli_epi16(m, i));
} }
__forceinline GSVector4i sll16(__m128i i) const
{
return GSVector4i(_mm_sll_epi16(m, i));
}
__forceinline GSVector4i sll32(int i) const __forceinline GSVector4i sll32(int i) const
{ {
return GSVector4i(_mm_slli_epi32(m, i)); return GSVector4i(_mm_slli_epi32(m, i));
} }
__forceinline GSVector4i sll32(__m128i i) const
{
return GSVector4i(_mm_sll_epi32(m, i));
}
__forceinline GSVector4i sll64(int i) const __forceinline GSVector4i sll64(int i) const
{ {
return GSVector4i(_mm_slli_epi64(m, i)); return GSVector4i(_mm_slli_epi64(m, i));
} }
__forceinline GSVector4i sll64(__m128i i) const
{
return GSVector4i(_mm_sll_epi64(m, i));
}
__forceinline GSVector4i srl16(int i) const __forceinline GSVector4i srl16(int i) const
{ {
return GSVector4i(_mm_srli_epi16(m, i)); return GSVector4i(_mm_srli_epi16(m, i));
} }
__forceinline GSVector4i srl16(__m128i i) const
{
return GSVector4i(_mm_srl_epi16(m, i));
}
__forceinline GSVector4i srl32(int i) const __forceinline GSVector4i srl32(int i) const
{ {
return GSVector4i(_mm_srli_epi32(m, i)); return GSVector4i(_mm_srli_epi32(m, i));
} }
__forceinline GSVector4i srl32(__m128i i) const
{
return GSVector4i(_mm_srl_epi32(m, i));
}
__forceinline GSVector4i srl64(int i) const __forceinline GSVector4i srl64(int i) const
{ {
return GSVector4i(_mm_srli_epi64(m, i)); return GSVector4i(_mm_srli_epi64(m, i));
} }
__forceinline GSVector4i srl64(__m128i i) const
{
return GSVector4i(_mm_srl_epi64(m, i));
}
__forceinline GSVector4i add8(const GSVector4i& v) const __forceinline GSVector4i add8(const GSVector4i& v) const
{ {
return GSVector4i(_mm_add_epi8(m, v.m)); return GSVector4i(_mm_add_epi8(m, v.m));
@ -3109,9 +3149,9 @@ public:
VECTOR4_SHUFFLE_1(w, 3) VECTOR4_SHUFFLE_1(w, 3)
}; };
__forceinline GSVector4i::GSVector4i(const GSVector4& v) __forceinline GSVector4i::GSVector4i(const GSVector4& v, bool truncate)
{ {
m = _mm_cvttps_epi32(v); m = truncate ? _mm_cvttps_epi32(v) : _mm_cvtps_epi32(v);
} }
__forceinline GSVector4::GSVector4(const GSVector4i& v) __forceinline GSVector4::GSVector4(const GSVector4i& v)
@ -3158,7 +3198,7 @@ public:
__forceinline GSVector8i() {} __forceinline GSVector8i() {}
__forceinline explicit GSVector8i(const GSVector8& v); __forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
static GSVector8i cast(const GSVector8& v); static GSVector8i cast(const GSVector8& v);
@ -4158,9 +4198,9 @@ public:
#if _M_SSE >= 0x500 #if _M_SSE >= 0x500
__forceinline GSVector8i::GSVector8i(const GSVector8& v) __forceinline GSVector8i::GSVector8i(const GSVector8& v, bool truncate)
{ {
m = _mm256_cvttps_epi32(v); m = truncate ? _mm256_cvttps_epi32(v) : _mm256_cvtps_epi32(v);
} }
__forceinline GSVector8::GSVector8(const GSVector8i& v) __forceinline GSVector8::GSVector8(const GSVector8i& v)
@ -4180,10 +4220,10 @@ __forceinline GSVector8 GSVector8::cast(const GSVector8i& v)
#else #else
__forceinline GSVector8i::GSVector8i(const GSVector8& v) __forceinline GSVector8i::GSVector8i(const GSVector8& v, bool truncate)
{ {
m[0] = _mm_cvttps_epi32(v.m[0]); m[0] = truncate ? _mm_cvttps_epi32(v.m[0]) : _mm_cvtps_epi32(v.m[0]);
m[1] = _mm_cvttps_epi32(v.m[1]); m[1] = truncate ? _mm_cvttps_epi32(v.m[1]) : _mm_cvtps_epi32(v.m[1]);
} }
__forceinline GSVector8::GSVector8(const GSVector8i& v) __forceinline GSVector8::GSVector8(const GSVector8i& v)