GSdx: only minor changes

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@4494 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2011-03-27 03:12:12 +00:00
parent c2266c3edc
commit 8ca01f4b77
10 changed files with 225 additions and 151 deletions

View File

@ -100,7 +100,7 @@ protected:
r.right = r.left + 256;
r.bottom = r.top + 256;
Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r).c_str(), m_env.STATUS.TP, r, false);
Dump(format("da_%d_%d_%d_%d_%d", m_env.STATUS.TP, r.left, r.top, r.right, r.bottom).c_str(), m_env.STATUS.TP, r, false);
}
*/

View File

@ -610,20 +610,23 @@ int GPUState::PH_Read(GPUReg* r, int size)
int w = r[2].XY.X;
int h = r[2].XY.Y;
GSVector4i r2;
if(w > 0 && h > 0)
{
GSVector4i r2;
r2.left = r[1].XY.X;
r2.top = r[1].XY.Y;
r2.right = r2.left + w;
r2.bottom = r2.top + h;
r2.left = r[1].XY.X;
r2.top = r[1].XY.Y;
r2.right = r2.left + w;
r2.bottom = r2.top + h;
m_read.bytes = ((w * h + 1) & ~1) * 2;
m_read.cur = 0;
m_read.Reserve(m_read.bytes);
m_read.bytes = ((w * h + 1) & ~1) * 2;
m_read.cur = 0;
m_read.Reserve(m_read.bytes);
m_mem.ReadRect(r2, (uint16*)m_read.buff);
m_mem.ReadRect(r2, (uint16*)m_read.buff);
Dump("r");
Dump("r");
}
m_env.STATUS.IMG = 1;

View File

@ -2764,42 +2764,33 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
if(m_sel.mmin)
{
int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
if(pixels == 4)
{
vmovdqa(ptr[&m_local.temp.test], xmm7);
}
for(int j = 0; j < 4; j++)
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < pixels; i++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < 4; i++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
if(pixels == 4)
{
vmovdqa(xmm5, xmm7);
vmovdqa(xmm7, ptr[&m_local.temp.test]);
}
else
{
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, j);
}
}
}
else
{
int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
// TODO: might be faster to read in columns, inserts into the same register would be further from eachother (last one overwrites xmm5, need to use xmm7)
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
for(int i = 0; i < pixels; i++)
{

View File

@ -2928,36 +2928,29 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
{
#if _M_SSE >= 0x401
int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
const int r[] = {5, 6, 2, 4, 0, 1, 3, 7};
if(pixels == 4)
{
movdqa(ptr[&m_local.temp.test], xmm7);
}
for(int j = 0; j < 4; j++)
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < pixels; i++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
for(int i = 0; i < 4; i++)
{
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
}
}
if(pixels == 4)
{
movdqa(xmm5, xmm7);
movdqa(xmm7, ptr[&m_local.temp.test]);
}
else
{
for(int j = 0; j < 4; j++)
{
mov(ebx, ptr[&lod_i->u32[j]]);
mov(ebx, ptr[edx + ebx * sizeof(void*) + mip_offset]);
ReadTexel(xmm6, xmm5, j);
}
}
#else
@ -3082,12 +3075,10 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
}
else
{
int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
#if _M_SSE >= 0x401
// TODO: might be faster to read in columns, inserts into the same register would be further from eachother (last one overwrites xmm5, need to use xmm7)
for(int i = 0; i < pixels; i++)
{
for(int j = 0; j < 4; j++)
@ -3098,7 +3089,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
#else
int t[] = {1, 4, 1, 5, 2, 5, 2, 0};
const int t[] = {1, 4, 1, 5, 2, 5, 2, 0};
for(int i = 0; i < pixels; i++)
{

View File

@ -86,7 +86,8 @@ void GSRasterizer::Draw(const GSRasterizerData* data)
{
case GS_POINT_CLASS:
m_stats.prims = count;
for(int i = 0; i < count; i++) DrawPoint(&vertices[i]);
if(data->scissor_test) DrawPoint<true>(vertices, count);
else DrawPoint<false>(vertices, count);
break;
case GS_LINE_CLASS:
ASSERT(!(count & 1));
@ -117,19 +118,23 @@ void GSRasterizer::GetStats(GSRasterizerStats& stats)
stats = m_stats;
}
void GSRasterizer::DrawPoint(const GSVertexSW* v)
template<bool scissor_test>
void GSRasterizer::DrawPoint(const GSVertexSW* v, int count)
{
GSVector4i p(v->p);
if(m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
for(; count > 0; count--, v++)
{
if(IsOneOfMyScanlines(p.y))
GSVector4i p(v->p);
if(!scissor_test || m_scissor.left <= p.x && p.x < m_scissor.right && m_scissor.top <= p.y && p.y < m_scissor.bottom)
{
m_stats.pixels++;
if(IsOneOfMyScanlines(p.y))
{
m_stats.pixels++;
m_ds->SetupPrim(v, *v);
m_ds->SetupPrim(v, *v);
m_ds->DrawScanline(1, p.x, p.y, *v);
m_ds->DrawScanline(1, p.x, p.y, *v);
}
}
}
}
@ -206,11 +211,7 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
{
if(IsOneOfMyScanlines(p.y))
{
*e = edge;
e->p.i16[0] = (int16)p.x;
e->p.i16[1] = (int16)p.y;
e->p.i16[2] = 1;
AddScanline(e, 1, p.x, p.y, edge);
e++;
}
@ -243,8 +244,6 @@ static const int s_abc[8][4] =
void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
{
// TODO: GSVertexSW::c/t could be merged into a GSVector8
GSVertexSW v[4];
GSVertexSW dv[3];
GSVertexSW ddv[3];
@ -338,7 +337,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.xxzw(); // r.x => l.y
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
dl.p = dl.p.insert<0, 1>(ddv[1 - j].p); // dr.x => dl.y
l += dl * dy;
@ -351,13 +350,13 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
// v[3] isn't accurate enough, it may leave gaps horizontally if it happens to be on the left side of the triangle
// example: previous triangle's scanline ends on 48.9999, this one's starts from 49.0001, the pixel at 49 isn't drawn
GSVertexSW l = v[1 + (1 << j)];
GSVertexSW l = v[1 + (j << 1)];
GSVertexSW dl = ddv[2 - j];
GSVector4 dy = tbmax.zzzz() - l.p.yyyy();
l.p = l.p.upl(v[3 - (1 << j)].p).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(ddv[1 + j].p).xyzw(dl.p); // dr.x => dl.y
l.p = l.p.insert<0, 1>(v[3 - (j << 1)].p); // r.x => l.y
dl.p = dl.p.insert<0, 1>(ddv[1 + j].p); // dr.x => dl.y
l += dl * dy;
@ -375,8 +374,8 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.upl(v[1 - j].p).xyzw(l.p); // r.x => l.y
dl.p = dl.p.upl(ddv[2 - j].p).xyzw(dl.p); // dr.x => dl.y
l.p = l.p.insert<0, 1>(v[1 - j].p); // r.x => l.y
dl.p = dl.p.insert<0, 1>(ddv[2 - j].p); // dr.x => dl.y
l += dl * dy;
@ -395,7 +394,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
GSVector4 dy = tbmax.xxxx() - l.p.yyyy();
l.p = l.p.xxzw(); // r.x => l.y
dl.p = dl.p.upl(ddv[1 - j].p).xyzw(dl.p); // dr.x => dl.y
dl.p = dl.p.insert<0, 1>(ddv[1 - j].p); // dr.x => dl.y
l += dl * dy;
@ -417,13 +416,15 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
GSVertexSW* RESTRICT e = &m_edge.buff[m_edge.count];
GSVector4 scissor = m_fscissor.xzxz();
while(1)
{
if(IsOneOfMyScanlines(top))
{
GSVector4 lrf = l.p.ceil();
GSVector4 lrmax = lrf.max(m_fscissor.xzxz());
GSVector4 lrmin = lrf.min(m_fscissor.xzxz());
GSVector4 lrmax = lrf.max(scissor);
GSVector4 lrmin = lrf.min(scissor);
GSVector4i lr = GSVector4i(lrmax.xxyy(lrmin));
int left = lr.extract32<0>();
@ -435,11 +436,9 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& l, const
{
m_stats.pixels += pixels;
*e = l + dscan * (lrmax - l.p).xxxx();
GSVector4 prestep = lrmax - l.p;
e->p.i16[0] = (int16)left;
e->p.i16[1] = (int16)top;
e->p.i16[2] = (int16)pixels;
AddScanline(e, pixels, left, top, l + dscan * prestep.xxxx());
e++;
}
@ -496,8 +495,12 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertices)
dedge.t = (dv.t / dv.p.yyyy()).xyxy(zero).wyww();
dscan.t = (dv.t / dv.p.xxxx()).xyxy(zero).xwww();
if(scan.p.y < (float)r.top) scan.t += dedge.t * ((float)r.top - scan.p.y);
if(scan.p.x < (float)r.left) scan.t += dscan.t * ((float)r.left - scan.p.x);
GSVector4 prestep = GSVector4(r.left, r.top) - scan.p;
int m = (prestep == GSVector4::zero()).mask();
if((m & 2) == 0) scan.t += dedge.t * prestep.yyyy();
if((m & 1) == 0) scan.t += dscan.t * prestep.xxxx();
m_ds->SetupPrim(v, dscan);
@ -581,16 +584,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
{
m_stats.pixels++;
*e = edge;
AddScanline(e, 1, xi, top, edge);
e->t.u32[3] = (0x10000 - xf) & 0xffff;
e->p.i16[0] = (int16)xi;
e->p.i16[1] = (int16)top;
e->p.i16[2] = 1;
e++;
}
@ -609,16 +606,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.left <= xi && xi < m_scissor.right && IsOneOfMyScanlines(xi))
{
m_stats.pixels++;
*e = edge;
AddScanline(e, 1, xi, top, edge);
e->t.u32[3] = xf;
e->p.i16[0] = (int16)xi;
e->p.i16[1] = (int16)top;
e->p.i16[2] = 1;
e++;
}
@ -678,16 +669,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{
m_stats.pixels++;
*e = edge;
AddScanline(e, 1, left, yi, edge);
e->t.u32[3] = (0x10000 - yf) & 0xffff;
e->p.i16[0] = (int16)left;
e->p.i16[1] = (int16)yi;
e->p.i16[2] = 1;
e++;
}
@ -706,16 +691,10 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
if(m_scissor.top <= yi && yi < m_scissor.bottom && IsOneOfMyScanlines(yi))
{
m_stats.pixels++;
*e = edge;
AddScanline(e, 1, left, yi, edge);
e->t.u32[3] = yf;
e->p.i16[0] = (int16)left;
e->p.i16[1] = (int16)yi;
e->p.i16[2] = 1;
e++;
}
@ -727,7 +706,20 @@ void GSRasterizer::DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GS
}
}
m_edge.count += e - &m_edge.buff[m_edge.count];
int count = e - &m_edge.buff[m_edge.count];
m_stats.pixels += count;
m_edge.count += count;
}
void GSRasterizer::AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan)
{
*e = scan;
e->p.i16[0] = (int16)pixels;
e->p.i16[1] = (int16)left;
e->p.i16[2] = (int16)top;
}
void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge)
@ -741,18 +733,31 @@ void GSRasterizer::Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bo
m_ds->SetupPrim(vertices, dscan);
const GSVertexSW* RESTRICT e = m_edge.buff;
int i = 0;
const GSVertexSW* RESTRICT ee = e + count;
if(!edge)
{
do {m_ds->DrawScanline(e[i].p.i16[2], e[i].p.i16[0], e[i].p.i16[1], e[i]);}
while(++i < count);
do
{
int pixels = e->p.i16[0];
int left = e->p.i16[1];
int top = e->p.i16[2];
m_ds->DrawScanline(pixels, left, top, *e++);
}
while(e < ee);
}
else
{
do {m_ds->DrawEdge(e[i].p.i16[2], e[i].p.i16[0], e[i].p.i16[1], e[i]);}
while(++i < count);
do
{
int pixels = e->p.i16[0];
int left = e->p.i16[1];
int top = e->p.i16[2];
m_ds->DrawEdge(pixels, left, top, *e++);
}
while(e < ee);
}
m_edge.count = 0;

View File

@ -31,11 +31,14 @@ __aligned(class, 32) GSRasterizerData
{
public:
GSVector4i scissor;
bool scissor_test;
GS_PRIM_CLASS primclass;
const GSVertexSW* vertices;
int count;
uint64 frame;
const void* param;
GSRasterizerData() : scissor_test(true) {}
};
class IDrawScanline : public GSAlignedClass<32>
@ -60,8 +63,8 @@ public:
virtual void PrintStats() = 0;
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
__forceinline void DrawScanline(int right, int left, int top, const GSVertexSW& scan) {m_ds(right, left, top, scan);}
__forceinline void DrawEdge(int right, int left, int top, const GSVertexSW& scan) {m_de(right, left, top, scan);}
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
__forceinline bool IsEdge() const {return m_de != NULL;}
@ -90,18 +93,20 @@ protected:
GSVector4 m_fscissor;
struct {GSVertexSW* buff; int count;} m_edge;
void DrawPoint(const GSVertexSW* v);
typedef void (GSRasterizer::*DrawPrimPtr)(const GSVertexSW* v, int count);
template<bool scissor_test>
void DrawPoint(const GSVertexSW* v, int count);
void DrawLine(const GSVertexSW* v);
void DrawTriangle(const GSVertexSW* v);
void DrawSprite(const GSVertexSW* v);
void DrawEdge(const GSVertexSW* v);
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& l, const GSVertexSW& dl, const GSVertexSW& dscan);
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
__forceinline bool IsOneOfMyScanlines(int scanline) const;
__forceinline void AddScanline(GSVertexSW* e, int pixels, int left, int top, const GSVertexSW& scan);
__forceinline void Flush(const GSVertexSW* vertices, const GSVertexSW& dscan, bool edge = false);
public:

View File

@ -72,7 +72,8 @@ void GSRendererSW::VSync(int field)
m_reset = false;
}
// if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
//
if((m_perfmon.GetFrame() & 255) == 0) m_rl.PrintStats();
}
void GSRendererSW::ResetDevice()
@ -171,18 +172,21 @@ void GSRendererSW::Draw()
s_n++;
}
GSVector4i scissor(m_context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p));
GSVector4i r = bbox.rintersect(scissor);
GSRasterizerData data;
data.scissor = GSVector4i(m_context->scissor.in);
data.scissor.z = min(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.scissor = scissor;
data.scissor.z = std::min<int>(data.scissor.z, (int)m_context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
data.scissor_test = !bbox.eq(r);
data.primclass = m_vt.m_primclass;
data.vertices = m_vertices;
data.count = m_count;
data.frame = m_perfmon.GetFrame();
data.param = &gd;
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(data.scissor);
m_rl.Draw(&data, r.width(), r.height());
if(gd.sel.fwrite)
@ -364,8 +368,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// 100 l round
// 101 l tri
// TODO: (int)m_vt.m_lod.x >= mxl => LCM == 1
if(m_vt.m_lod.x > 0)
{
gd.sel.ltf = context->TEX1.MMIN >> 2;
@ -383,13 +385,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
if(gd.sel.mmin == 2)
{
mxl--;
mxl--; // don't sample beyond the last level (TODO: add a dummy level instead?)
}
gd.mxl = GSVector4((float)mxl);
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
gd.k = GSVector4((float)k);
if(gd.sel.fst)
{
ASSERT(gd.sel.lcm == 1);
@ -398,6 +396,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
gd.sel.lcm = 1;
}
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
{
k = (int)m_vt.m_lod.x << 16;
gd.sel.lcm = 1;
}
if(gd.sel.lcm)
{
int lod = std::max<int>(std::min<int>(k, mxl), 0);
@ -412,6 +417,12 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
// TODO: lot to optimize when lod is constant
}
else
{
gd.mxl = GSVector4((float)mxl);
gd.l = GSVector4((float)(-0x10000 << context->TEX1.L));
gd.k = GSVector4((float)k);
}
GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
@ -486,8 +497,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
}
else
{
// TODO: these shortcuts are not compatible with mipmapping, yet
if(gd.sel.fst == 0)
{
// skip per pixel division if q is constant
@ -507,8 +516,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
v[i].t *= w;
}
}
// TODO: q is now destoroyed, but since q is constant we should be able to pre-calc gd.lod and change LCM to 1
}
else if(primclass == GS_SPRITE_CLASS)
{
@ -521,8 +528,6 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
v[i + 0].t *= w;
v[i + 1].t *= w;
}
// TODO: preserve q, or if there only one sprite then see the comment above
}
}

View File

@ -66,6 +66,20 @@ public:
void Set() {SetEvent(m_hEvent);}
bool Wait() {return WaitForSingleObject(m_hEvent, INFINITE) == WAIT_OBJECT_0;}
};
/*
class GSAutoResetEvent
{
protected:
long m_sync;
public:
GSAutoResetEvent() {m_sync = 0;}
~GSAutoResetEvent() {}
void Set() {_interlockedbittestandset(&m_sync, 0);}
bool Wait() {while(!_interlockedbittestandreset(&m_sync, 0)) _mm_pause(); return true;}
};
*/
#else

View File

@ -26,7 +26,8 @@ const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
const GSVector4 GSVector4::m_half(0.5f);
const GSVector4 GSVector4::m_one(1.0f);
const GSVector4 GSVector4::m_x3f800000(_mm_castsi128_ps(_mm_set1_epi32(0x3f800000)));
const GSVector4 GSVector4::m_two(2.0f);
const GSVector4 GSVector4::m_four(4.0f);
const GSVector4 GSVector4::m_x4b000000(_mm_castsi128_ps(_mm_set1_epi32(0x4b000000)));
GSVector4i GSVector4i::fit(int arx, int ary) const

View File

@ -2330,8 +2330,8 @@ public:
static const GSVector4 m_ps4567;
static const GSVector4 m_half;
static const GSVector4 m_one;
static const GSVector4 m_x3f800000;
static const GSVector4 m_two;
static const GSVector4 m_four;
static const GSVector4 m_x4b000000;
__forceinline GSVector4()
@ -2462,12 +2462,12 @@ public:
if((mode & 7) == (Round_NegInf & 7))
{
return b - ((a < b) & m_x3f800000);
return b - ((a < b) & m_one);
}
if((mode & 7) == (Round_PosInf & 7))
{
return b + ((a > b) & m_x3f800000);
return b + ((a > b) & m_one);
}
ASSERT((mode & 7) == (Round_NearestInt & 7)); // other modes aren't implemented
@ -2702,7 +2702,66 @@ public:
#endif
}
// TODO: insert
template<int src, int dst> __forceinline GSVector4 insert(const GSVector4& v) const
{
#if 0 // _M_SSE >= 0x401
// NOTE: it's faster with shuffles...
return GSVector4(_mm_insert_ps(m, v.m, _MM_MK_INSERTPS_NDX(src, dst, 0)));
#else
switch(dst)
{
case 0:
switch(src)
{
case 0: return v.xxyy(*this).xzzw(*this);
case 1: return v.yyyy(*this).xzzw(*this);
case 2: return v.zzyy(*this).xzzw(*this);
case 3: return v.wwyy(*this).xzzw(*this);
default: __assume(0);
}
break;
case 1:
switch(src)
{
case 0: return v.xxxx(*this).zxzw(*this);
case 1: return v.yyxx(*this).zxzw(*this);
case 2: return v.zzxx(*this).zxzw(*this);
case 3: return v.wwxx(*this).zxzw(*this);
default: __assume(0);
}
break;
case 2:
switch(src)
{
case 0: return xyxz(v.xxww(*this));
case 1: return xyxz(v.yyww(*this));
case 2: return xyxz(v.zzww(*this));
case 3: return xyxz(v.wwww(*this));
default: __assume(0);
}
break;
case 3:
switch(src)
{
case 0: return xyzx(v.xxzz(*this));
case 1: return xyzx(v.yyzz(*this));
case 2: return xyzx(v.zzzz(*this));
case 3: return xyzx(v.wwzz(*this));
default: __assume(0);
}
break;
default:
__assume(0);
}
#endif
return *this;
}
template<int i> __forceinline int extract() const
{