Minor changes to GSdx and fixed the SSE2 compiling error.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@476 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2009-02-11 21:42:10 +00:00
parent 2521bd2f1d
commit 393c8ae102
7 changed files with 174 additions and 60 deletions

View File

@ -166,6 +166,7 @@ void GSDrawScanline::BeginDraw(const GSRasterizerData* data, Functions* f)
m_env.t.min = m_env.t.min.xxxxlh(); m_env.t.min = m_env.t.min.xxxxlh();
m_env.t.max = m_env.t.max.xxxxlh(); m_env.t.max = m_env.t.max.xxxxlh();
m_env.t.mask = m_env.t.mask.xxzz(); m_env.t.mask = m_env.t.mask.xxzz();
m_env.t.invmask = ~m_env.t.mask;
} }
// //

View File

@ -164,7 +164,7 @@ L("@@");
// esi = fzbr // esi = fzbr
// edi = fzbc // edi = fzbc
// ebp = za // ebp = za
//xmm2 = fd // xmm2 = fd
// xmm3 = fm // xmm3 = fm
// xmm4 = zm // xmm4 = zm
// xmm5 = rb // xmm5 = rb
@ -768,17 +768,17 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
pcmpeqd(xmm1, xmm1); pcmpeqd(xmm1, xmm1);
psrlw(xmm1, 15); psrlw(xmm1, 15);
paddw(xmm3, xmm1); paddw(xmm3, xmm1);
}
// uv0 = Wrap(uv0); // uv0 = Wrap(uv0);
Wrap(xmm2, xmm1);
if(m_env.sel.ltf)
{
// uv1 = Wrap(uv1); // uv1 = Wrap(uv1);
Wrap(xmm3, xmm1); Wrap(xmm2, xmm3);
}
else
{
// uv0 = Wrap(uv0);
Wrap(xmm2);
} }
// xmm2 = uv0 // xmm2 = uv0
@ -983,6 +983,118 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
} }
} }
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
{
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
if(m_env.sel.wms == m_env.sel.wmt)
{
if(m_env.sel.wms)
{
pmaxsw(uv, xmmword[&m_env.t.min]);
pminsw(uv, xmmword[&m_env.t.max]);
}
else
{
pand(uv, xmmword[&m_env.t.min]);
por(uv, xmmword[&m_env.t.max]);
}
}
else
{
movdqa(xmm1, uv);
movdqa(xmm4, xmmword[&m_env.t.min]);
movdqa(xmm5, xmmword[&m_env.t.max]);
// GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max);
pmaxsw(uv, xmm4);
pminsw(uv, xmm5);
// GSVector4i repeat = (t & m_env.t.min) | m_env.t.max;
pand(xmm1, xmm4);
por(xmm1, xmm5);
// clamp.blend8(repeat, m_env.t.mask);
movdqa(xmm0, xmmword[&m_env.t.mask]);
blend8(uv, xmm1);
}
}
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
{
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
if(m_env.sel.wms == m_env.sel.wmt)
{
movdqa(xmm4, xmmword[&m_env.t.min]);
movdqa(xmm5, xmmword[&m_env.t.max]);
if(m_env.sel.wms)
{
pmaxsw(uv0, xmm4);
pminsw(uv0, xmm5);
pmaxsw(uv1, xmm4);
pminsw(uv1, xmm5);
}
else
{
pand(uv0, xmm4);
por(uv0, xmm5);
pand(uv1, xmm4);
por(uv1, xmm5);
}
}
else
{
movdqa(xmm1, uv0);
movdqa(xmm6, uv1);
movdqa(xmm4, xmmword[&m_env.t.min]);
movdqa(xmm5, xmmword[&m_env.t.max]);
// GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max);
pmaxsw(uv0, xmm4);
pminsw(uv0, xmm5);
pmaxsw(uv1, xmm4);
pminsw(uv1, xmm5);
// GSVector4i repeat = (t & m_env.t.min) | m_env.t.max;
pand(xmm1, xmm4);
por(xmm1, xmm5);
pand(xmm6, xmm4);
por(xmm6, xmm5);
// clamp.blend8(repeat, m_env.t.mask);
if(m_cpu.has(util::Cpu::tSSE41))
{
movdqa(xmm0, xmmword[&m_env.t.mask]);
pblendvb(uv0, xmm1);
pblendvb(uv1, xmm6);
}
else
{
movdqa(xmm0, xmmword[&m_env.t.invmask]);
movdqa(xmm4, xmm0);
pand(uv0, xmm0);
pandn(xmm0, xmm1);
por(uv0, xmm0);
pand(uv1, xmm4);
pandn(xmm4, xmm6);
por(uv1, xmm4);
}
}
}
void GSDrawScanlineCodeGenerator::AlphaTFX() void GSDrawScanlineCodeGenerator::AlphaTFX()
{ {
if(!m_env.sel.fb) if(!m_env.sel.fb)
@ -1288,9 +1400,32 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
// test |= ((fd [<< 16]) ^ m_env.datm).sra32(31); // test |= ((fd [<< 16]) ^ m_env.datm).sra32(31);
movdqa(xmm1, xmm2); movdqa(xmm1, xmm2);
if(m_env.sel.fpsm == 2) pslld(xmm1, 16);
pxor(xmm1, xmmword[&m_env.datm]); if(m_env.sel.datm)
psrad(xmm1, 31); {
if(m_env.sel.fpsm == 2)
{
pxor(xmm0, xmm0);
psrld(xmm1, 15);
pcmpeqd(xmm1, xmm0);
}
else
{
pcmpeqd(xmm0, xmm0);
pxor(xmm1, xmm0);
psrad(xmm1, 31);
}
}
else
{
if(m_env.sel.fpsm == 2)
{
pslld(xmm1, 16);
}
psrad(xmm1, 31);
}
por(xmm7, xmm1); por(xmm7, xmm1);
alltrue(xmm7, eax, "step"); alltrue(xmm7, eax, "step");
@ -1826,42 +1961,6 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, con
else pinsrd(dst, src, i); else pinsrd(dst, src, i);
} }
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv, const Xmm& temp)
{
if(m_env.sel.wms == m_env.sel.wmt)
{
if(m_env.sel.wms)
{
pmaxsw(uv, xmmword[&m_env.t.min]);
pminsw(uv, xmmword[&m_env.t.max]);
}
else
{
pand(uv, xmmword[&m_env.t.min]);
por(uv, xmmword[&m_env.t.max]);
}
}
else
{
movdqa(temp, uv);
// GSVector4i clamp = t.sat_i16(m_env.t.min, m_env.t.max);
pmaxsw(uv, xmmword[&m_env.t.min]);
pminsw(uv, xmmword[&m_env.t.max]);
// GSVector4i repeat = (t & m_env.t.min) | m_env.t.max;
pand(temp, xmmword[&m_env.t.min]);
por(temp, xmmword[&m_env.t.max]);
// clamp.blend8(repeat, m_env.t.mask);
movdqa(xmm0, xmmword[&m_env.t.mask]);
blend8(uv, temp);
}
}
template<int shift> template<int shift>
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f) void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f)
{ {
@ -1871,7 +1970,7 @@ void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f)
} }
else else
{ {
pslld(a, shift + 1); psllw(a, shift + 1);
pmulhw(a, f); pmulhw(a, f);
} }
} }

View File

@ -43,6 +43,8 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void Step(); void Step();
void TestZ(const Xmm& temp1, const Xmm& temp2); void TestZ(const Xmm& temp1, const Xmm& temp2);
void SampleTexture(); void SampleTexture();
void Wrap(const Xmm& uv0);
void Wrap(const Xmm& uv0, const Xmm& uv1);
void AlphaTFX(); void AlphaTFX();
void TestAlpha(); void TestAlpha();
void ColorTFX(); void ColorTFX();
@ -57,8 +59,7 @@ class GSDrawScanlineCodeGenerator : public CodeGenerator
void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm); void WritePixel(const Xmm& src, const Xmm& temp, const Reg32& addr, uint8 i, int psm);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2); void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, const Xmm& temp1, const Xmm& temp2);
void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i); void ReadTexel(const Xmm& dst, const Xmm& addr, const Reg32& base, uint8 i);
void Wrap(const Xmm& uv, const Xmm& temp);
template<int shift> void modulate16(const Xmm& a, const Operand& f); template<int shift> void modulate16(const Xmm& a, const Operand& f);
template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f); template<int shift> void lerp16(const Xmm& a, const Xmm& b, const Xmm& f);
void mix16(const Xmm& a, const Xmm& b, const Xmm& temp); void mix16(const Xmm& a, const Xmm& b, const Xmm& temp);

View File

@ -446,6 +446,7 @@ protected:
if(context->FRAME.PSM != PSM_PSMCT24) if(context->FRAME.PSM != PSM_PSMCT24)
{ {
p.sel.date = context->TEST.DATE; p.sel.date = context->TEST.DATE;
p.sel.datm = context->TEST.DATM;
} }
if(PRIM->ABE) if(PRIM->ABE)

View File

@ -55,9 +55,10 @@ union GSScanlineSelector
DWORD ztest:1; // 35 DWORD ztest:1; // 35
DWORD wms:1; // 36 (0: repeat, 1: clamp) DWORD wms:1; // 36 (0: repeat, 1: clamp)
DWORD wmt:1; // 37 DWORD wmt:1; // 37
DWORD colclamp:1; // 38 DWORD datm:1; // 38
DWORD fba:1; // 39 DWORD colclamp:1; // 39
DWORD dthe:1; // 40 DWORD fba:1; // 40
DWORD dthe:1; // 41
}; };
struct struct
@ -128,7 +129,7 @@ __declspec(align(16)) struct GSScanlineEnvironment
GSVector4i* dimx; GSVector4i* dimx;
GSVector4i fm, zm; GSVector4i fm, zm;
struct {GSVector4i min, max, mask;} t; // [u] x 4 [v] x 4 struct {GSVector4i min, max, mask, invmask;} t; // [u] x 4 [v] x 4
GSVector4i datm; GSVector4i datm;
GSVector4i colclamp; GSVector4i colclamp;
GSVector4i fba; GSVector4i fba;

View File

@ -327,7 +327,15 @@ public:
GSVector4i blend8(const GSVector4i& a, const GSVector4i& mask) const GSVector4i blend8(const GSVector4i& a, const GSVector4i& mask) const
{ {
#if _M_SSE >= 0x401
return GSVector4i(_mm_blendv_epi8(m, a, mask)); return GSVector4i(_mm_blendv_epi8(m, a, mask));
#else
return GSVector4i(_mm_or_si128(_mm_andnot_si128(mask, m), _mm_and_si128(mask, a)));
#endif
} }
#if _M_SSE >= 0x401 #if _M_SSE >= 0x401
@ -2310,7 +2318,15 @@ public:
GSVector4 blend8(const GSVector4& a, const GSVector4& mask) const GSVector4 blend8(const GSVector4& a, const GSVector4& mask) const
{ {
#if _M_SSE >= 0x401
return GSVector4(_mm_blendv_ps(m, a, mask)); return GSVector4(_mm_blendv_ps(m, a, mask));
#else
return GSVector4(_mm_or_ps(_mm_andnot_ps(mask, m), _mm_and_ps(mask, a)));
#endif
} }
GSVector4 upl(const GSVector4& a) const GSVector4 upl(const GSVector4& a) const

View File

@ -103,11 +103,6 @@
#else #else
// not an equal replacement for sse4's blend but for our needs it is ok
#define _mm_blendv_ps(a, b, mask) _mm_or_ps(_mm_andnot_ps(mask, a), _mm_and_ps(mask, b))
#define _mm_blendv_epi8(a, b, mask) _mm_or_si128(_mm_andnot_si128(mask, a), _mm_and_si128(mask, b))
__forceinline __m128 _mm_round_ps(__m128 x) __forceinline __m128 _mm_round_ps(__m128 x)
{ {
__m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000); __m128 t = _mm_or_ps(_mm_and_ps(ps_80000000, x), ps_4b000000);