mirror of https://github.com/PCSX2/pcsx2.git
gsdx sw JIT: dynamically select between AVX1 and SSE code path (scanline)
This commit is contained in:
parent
6b78b8f9ce
commit
574a2c774e
|
@ -22,6 +22,17 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
#else
|
||||
void GSDrawScanlineCodeGenerator::Generate()
|
||||
{
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
Generate_AVX();
|
||||
else
|
||||
Generate_SSE();
|
||||
}
|
||||
#endif
|
||||
|
||||
#if _M_SSE >= 0x501
|
||||
|
||||
alignas(8) const uint8 GSDrawScanlineCodeGenerator::m_test[16][8] =
|
||||
|
@ -183,194 +194,179 @@ void GSDrawScanlineCodeGenerator::blend8r(const Ymm& b, const Ymm& a)
|
|||
|
||||
void GSDrawScanlineCodeGenerator::modulate16(const Xmm& a, const Operand& f, int shift)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
if(shift == 0)
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpmulhrsw(a, f);
|
||||
if(shift == 0)
|
||||
{
|
||||
vpmulhrsw(a, f);
|
||||
}
|
||||
else
|
||||
{
|
||||
vpsllw(a, shift + 1);
|
||||
vpmulhw(a, f);
|
||||
}
|
||||
|
||||
}
|
||||
else
|
||||
{
|
||||
vpsllw(a, shift + 1);
|
||||
vpmulhw(a, f);
|
||||
if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3))
|
||||
{
|
||||
pmulhrsw(a, f);
|
||||
}
|
||||
else
|
||||
{
|
||||
psllw(a, shift + 1);
|
||||
pmulhw(a, f);
|
||||
}
|
||||
}
|
||||
|
||||
#else
|
||||
|
||||
if(shift == 0 && m_cpu.has(util::Cpu::tSSSE3))
|
||||
{
|
||||
pmulhrsw(a, f);
|
||||
}
|
||||
else
|
||||
{
|
||||
psllw(a, shift + 1);
|
||||
pmulhw(a, f);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpsubw(a, b);
|
||||
modulate16(a, f, shift);
|
||||
vpaddw(a, b);
|
||||
|
||||
#else
|
||||
|
||||
psubw(a, b);
|
||||
modulate16(a, f, shift);
|
||||
paddw(a, b);
|
||||
|
||||
#endif
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpsubw(a, b);
|
||||
modulate16(a, f, shift);
|
||||
vpaddw(a, b);
|
||||
}
|
||||
else
|
||||
{
|
||||
psubw(a, b);
|
||||
modulate16(a, f, shift);
|
||||
paddw(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::lerp16_4(const Xmm& a, const Xmm& b, const Xmm& f)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpsubw(a, b);
|
||||
vpmullw(a, f);
|
||||
vpsraw(a, 4);
|
||||
vpaddw(a, b);
|
||||
|
||||
#else
|
||||
|
||||
psubw(a, b);
|
||||
pmullw(a, f);
|
||||
psraw(a, 4);
|
||||
paddw(a, b);
|
||||
|
||||
#endif
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpsubw(a, b);
|
||||
vpmullw(a, f);
|
||||
vpsraw(a, 4);
|
||||
vpaddw(a, b);
|
||||
}
|
||||
else
|
||||
{
|
||||
psubw(a, b);
|
||||
pmullw(a, f);
|
||||
psraw(a, 4);
|
||||
paddw(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::mix16(const Xmm& a, const Xmm& b, const Xmm& temp)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpblendw(a, b, 0xaa);
|
||||
|
||||
#else
|
||||
|
||||
if(g_cpu.has(util::Cpu::tSSE41))
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
pblendw(a, b, 0xaa);
|
||||
vpblendw(a, b, 0xaa);
|
||||
}
|
||||
else
|
||||
{
|
||||
pcmpeqd(temp, temp);
|
||||
psrld(temp, 16);
|
||||
pand(a, temp);
|
||||
pandn(temp, b);
|
||||
por(a, temp);
|
||||
if(g_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pblendw(a, b, 0xaa);
|
||||
}
|
||||
else
|
||||
{
|
||||
pcmpeqd(temp, temp);
|
||||
psrld(temp, 16);
|
||||
pand(a, temp);
|
||||
pandn(temp, b);
|
||||
por(a, temp);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::clamp16(const Xmm& a, const Xmm& temp)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpackuswb(a, a);
|
||||
vpmovzxbw(a, a);
|
||||
|
||||
#else
|
||||
|
||||
if(g_cpu.has(util::Cpu::tSSE41))
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
packuswb(a, a);
|
||||
pmovzxbw(a, a);
|
||||
vpackuswb(a, a);
|
||||
vpmovzxbw(a, a);
|
||||
}
|
||||
else
|
||||
{
|
||||
packuswb(a, a);
|
||||
pxor(temp, temp);
|
||||
punpcklbw(a, temp);
|
||||
if(g_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
packuswb(a, a);
|
||||
pmovzxbw(a, a);
|
||||
}
|
||||
else
|
||||
{
|
||||
packuswb(a, a);
|
||||
pxor(temp, temp);
|
||||
punpcklbw(a, temp);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::alltrue()
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
je("step", T_NEAR);
|
||||
|
||||
#else
|
||||
|
||||
pmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
je("step", T_NEAR);
|
||||
|
||||
#endif
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
je("step", T_NEAR);
|
||||
}
|
||||
else
|
||||
{
|
||||
pmovmskb(eax, xmm7);
|
||||
cmp(eax, 0xffff);
|
||||
je("step", T_NEAR);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend(const Xmm& a, const Xmm& b, const Xmm& mask)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpand(b, mask);
|
||||
vpandn(mask, a);
|
||||
vpor(a, b, mask);
|
||||
|
||||
#else
|
||||
|
||||
pand(b, mask);
|
||||
pandn(mask, a);
|
||||
por(b, mask);
|
||||
movdqa(a, b);
|
||||
|
||||
#endif
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpand(b, mask);
|
||||
vpandn(mask, a);
|
||||
vpor(a, b, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
pand(b, mask);
|
||||
pandn(mask, a);
|
||||
por(b, mask);
|
||||
movdqa(a, b);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blendr(const Xmm& b, const Xmm& a, const Xmm& mask)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpand(b, mask);
|
||||
vpandn(mask, a);
|
||||
vpor(b, mask);
|
||||
|
||||
#else
|
||||
|
||||
pand(b, mask);
|
||||
pandn(mask, a);
|
||||
por(b, mask);
|
||||
|
||||
#endif
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpand(b, mask);
|
||||
vpandn(mask, a);
|
||||
vpor(b, mask);
|
||||
}
|
||||
else
|
||||
{
|
||||
pand(b, mask);
|
||||
pandn(mask, a);
|
||||
por(b, mask);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend8(const Xmm& a, const Xmm& b)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpblendvb(a, a, b, xmm0);
|
||||
|
||||
#else
|
||||
|
||||
if(g_cpu.has(util::Cpu::tSSE41))
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
vpblendvb(a, a, b, xmm0);
|
||||
else if(g_cpu.has(util::Cpu::tSSE41))
|
||||
pblendvb(a, b);
|
||||
else
|
||||
blend(a, b, xmm0);
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
|
||||
{
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
vpblendvb(b, a, b, xmm0);
|
||||
|
||||
#else
|
||||
|
||||
if(g_cpu.has(util::Cpu::tSSE41))
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
vpblendvb(b, a, b, xmm0);
|
||||
}
|
||||
else if(g_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pblendvb(a, b);
|
||||
movdqa(b, a);
|
||||
|
@ -379,8 +375,6 @@ void GSDrawScanlineCodeGenerator::blend8r(const Xmm& b, const Xmm& a)
|
|||
{
|
||||
blendr(b, a, xmm0);
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const Xmm& src)
|
||||
|
@ -388,31 +382,34 @@ void GSDrawScanlineCodeGenerator::split16_2x8(const Xmm& l, const Xmm& h, const
|
|||
// l = src & 0xFF; (1 left shift + 1 right shift)
|
||||
// h = (src >> 8) & 0xFF; (1 right shift)
|
||||
|
||||
#if _M_SSE >= 0x500
|
||||
if (src == h) {
|
||||
vpsllw(l, src, 8);
|
||||
vpsrlw(h, 8);
|
||||
} else if (src == l) {
|
||||
vpsrlw(h, src, 8);
|
||||
vpsllw(l, 8);
|
||||
} else {
|
||||
vpsllw(l, src, 8);
|
||||
vpsrlw(h, src, 8);
|
||||
if(g_cpu.has(util::Cpu::tAVX))
|
||||
{
|
||||
if (src == h) {
|
||||
vpsllw(l, src, 8);
|
||||
vpsrlw(h, 8);
|
||||
} else if (src == l) {
|
||||
vpsrlw(h, src, 8);
|
||||
vpsllw(l, 8);
|
||||
} else {
|
||||
vpsllw(l, src, 8);
|
||||
vpsrlw(h, src, 8);
|
||||
}
|
||||
vpsrlw(l, 8);
|
||||
}
|
||||
vpsrlw(l, 8);
|
||||
#else
|
||||
if (src == h) {
|
||||
movdqa(l, src);
|
||||
} else if (src == l) {
|
||||
movdqa(h, src);
|
||||
} else {
|
||||
movdqa(l, src);
|
||||
movdqa(h, src);
|
||||
else
|
||||
{
|
||||
if (src == h) {
|
||||
movdqa(l, src);
|
||||
} else if (src == l) {
|
||||
movdqa(h, src);
|
||||
} else {
|
||||
movdqa(l, src);
|
||||
movdqa(h, src);
|
||||
}
|
||||
psllw(l, 8);
|
||||
psrlw(l, 8);
|
||||
psrlw(h, 8);
|
||||
}
|
||||
psllw(l, 8);
|
||||
psrlw(l, 8);
|
||||
psrlw(h, 8);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -27,6 +27,12 @@
|
|||
|
||||
using namespace Xbyak;
|
||||
|
||||
#if defined(_M_AMD64) || defined(_WIN64)
|
||||
#define RegLong Reg64
|
||||
#else
|
||||
#define RegLong Reg32
|
||||
#endif
|
||||
|
||||
class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
||||
{
|
||||
void operator = (const GSDrawScanlineCodeGenerator&);
|
||||
|
@ -58,17 +64,9 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
void WriteZBuf();
|
||||
void AlphaBlend();
|
||||
void WriteFrame();
|
||||
|
||||
#if defined(_M_AMD64) || defined(_WIN64)
|
||||
void ReadPixel(const Ymm& dst, const Ymm& temp, const Reg64& addr);
|
||||
void WritePixel(const Ymm& src, const Ymm& temp, const Reg64& addr, const Reg32& mask, bool fast, int psm, int fz);
|
||||
void WritePixel(const Xmm& src, const Reg64& addr, uint8 i, uint8 j, int psm);
|
||||
#else
|
||||
void ReadPixel(const Ymm& dst, const Ymm& temp, const Reg32& addr);
|
||||
void WritePixel(const Ymm& src, const Ymm& temp, const Reg32& addr, const Reg32& mask, bool fast, int psm, int fz);
|
||||
void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, uint8 j, int psm);
|
||||
#endif
|
||||
|
||||
void ReadPixel(const Ymm& dst, const Ymm& temp, const RegLong& addr);
|
||||
void WritePixel(const Ymm& src, const Ymm& temp, const RegLong& addr, const Reg32& mask, bool fast, int psm, int fz);
|
||||
void WritePixel(const Xmm& src, const RegLong& addr, uint8 i, uint8 j, int psm);
|
||||
void ReadTexel(int pixels, int mip_offset = 0);
|
||||
void ReadTexel(const Ymm& dst, const Ymm& addr, uint8 i);
|
||||
|
||||
|
@ -85,39 +83,59 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
|
||||
#else
|
||||
|
||||
void Init();
|
||||
void Step();
|
||||
void TestZ(const Xmm& temp1, const Xmm& temp2);
|
||||
void SampleTexture();
|
||||
void Wrap(const Xmm& uv0);
|
||||
void Wrap(const Xmm& uv0, const Xmm& uv1);
|
||||
void SampleTextureLOD();
|
||||
void WrapLOD(const Xmm& uv0);
|
||||
void WrapLOD(const Xmm& uv0, const Xmm& uv1);
|
||||
void AlphaTFX();
|
||||
void ReadMask();
|
||||
void TestAlpha();
|
||||
void ColorTFX();
|
||||
void Fog();
|
||||
void ReadFrame();
|
||||
void TestDestAlpha();
|
||||
void WriteMask();
|
||||
void WriteZBuf();
|
||||
void AlphaBlend();
|
||||
void WriteFrame();
|
||||
void Generate_SSE();
|
||||
void Init_SSE();
|
||||
void Step_SSE();
|
||||
void TestZ_SSE(const Xmm& temp1, const Xmm& temp2);
|
||||
void SampleTexture_SSE();
|
||||
void Wrap_SSE(const Xmm& uv0);
|
||||
void Wrap_SSE(const Xmm& uv0, const Xmm& uv1);
|
||||
void SampleTextureLOD_SSE();
|
||||
void WrapLOD_SSE(const Xmm& uv0);
|
||||
void WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1);
|
||||
void AlphaTFX_SSE();
|
||||
void ReadMask_SSE();
|
||||
void TestAlpha_SSE();
|
||||
void ColorTFX_SSE();
|
||||
void Fog_SSE();
|
||||
void ReadFrame_SSE();
|
||||
void TestDestAlpha_SSE();
|
||||
void WriteMask_SSE();
|
||||
void WriteZBuf_SSE();
|
||||
void AlphaBlend_SSE();
|
||||
void WriteFrame_SSE();
|
||||
void ReadPixel_SSE(const Xmm& dst, const RegLong& addr);
|
||||
void WritePixel_SSE(const Xmm& src, const RegLong& addr, const Reg8& mask, bool fast, int psm, int fz);
|
||||
void WritePixel_SSE(const Xmm& src, const RegLong& addr, uint8 i, int psm);
|
||||
void ReadTexel_SSE(int pixels, int mip_offset = 0);
|
||||
void ReadTexel_SSE(const Xmm& dst, const Xmm& addr, uint8 i);
|
||||
|
||||
#if defined(_M_AMD64) || defined(_WIN64)
|
||||
void ReadPixel(const Xmm& dst, const Reg64& addr);
|
||||
void WritePixel(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz);
|
||||
void WritePixel(const Xmm& src, const Reg64& addr, uint8 i, int psm);
|
||||
#else
|
||||
void ReadPixel(const Xmm& dst, const Reg32& addr);
|
||||
void WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz);
|
||||
void WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm);
|
||||
#endif
|
||||
|
||||
void ReadTexel(int pixels, int mip_offset = 0);
|
||||
void ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i);
|
||||
void Generate_AVX();
|
||||
void Init_AVX();
|
||||
void Step_AVX();
|
||||
void TestZ_AVX(const Xmm& temp1, const Xmm& temp2);
|
||||
void SampleTexture_AVX();
|
||||
void Wrap_AVX(const Xmm& uv0);
|
||||
void Wrap_AVX(const Xmm& uv0, const Xmm& uv1);
|
||||
void SampleTextureLOD_AVX();
|
||||
void WrapLOD_AVX(const Xmm& uv0);
|
||||
void WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1);
|
||||
void AlphaTFX_AVX();
|
||||
void ReadMask_AVX();
|
||||
void TestAlpha_AVX();
|
||||
void ColorTFX_AVX();
|
||||
void Fog_AVX();
|
||||
void ReadFrame_AVX();
|
||||
void TestDestAlpha_AVX();
|
||||
void WriteMask_AVX();
|
||||
void WriteZBuf_AVX();
|
||||
void AlphaBlend_AVX();
|
||||
void WriteFrame_AVX();
|
||||
void ReadPixel_AVX(const Xmm& dst, const RegLong& addr);
|
||||
void WritePixel_AVX(const Xmm& src, const RegLong& addr, const Reg8& mask, bool fast, int psm, int fz);
|
||||
void WritePixel_AVX(const Xmm& src, const RegLong& addr, uint8 i, int psm);
|
||||
void ReadTexel_AVX(int pixels, int mip_offset = 0);
|
||||
void ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i);
|
||||
|
||||
void modulate16(const Xmm& a, const Operand& f, int shift);
|
||||
void lerp16(const Xmm& a, const Xmm& b, const Xmm& f, int shift);
|
||||
|
|
|
@ -45,7 +45,7 @@
|
|||
#define _zm xmm5
|
||||
#define _fd xmm6
|
||||
|
||||
#if _M_SSE == 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
#if _M_SSE < 0x501 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
#ifdef _WIN64
|
||||
#else
|
||||
|
@ -59,7 +59,7 @@ static const int _rz_zd = -8 * 10;
|
|||
static const int _rz_cov = -8 * 12;
|
||||
#endif
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Generate()
|
||||
void GSDrawScanlineCodeGenerator::Generate_AVX()
|
||||
{
|
||||
bool need_tex = m_sel.fb && m_sel.tfx != TFX_NONE;
|
||||
bool need_clut = need_tex && m_sel.tlu;
|
||||
|
@ -100,7 +100,7 @@ void GSDrawScanlineCodeGenerator::Generate()
|
|||
if(need_tex)
|
||||
mov(_m_local__gd__tex, ptr[_m_local__gd + offsetof(GSScanlineGlobalData, tex)]);
|
||||
|
||||
Init();
|
||||
Init_AVX();
|
||||
|
||||
// a0 = steps
|
||||
// t1 = fza_base
|
||||
|
@ -126,30 +126,30 @@ void GSDrawScanlineCodeGenerator::Generate()
|
|||
|
||||
L("loop");
|
||||
|
||||
TestZ(xmm5, xmm6);
|
||||
TestZ_AVX(xmm5, xmm6);
|
||||
|
||||
// ebp = za
|
||||
|
||||
if(m_sel.mmin)
|
||||
{
|
||||
SampleTextureLOD();
|
||||
SampleTextureLOD_AVX();
|
||||
}
|
||||
else
|
||||
{
|
||||
SampleTexture();
|
||||
SampleTexture_AVX();
|
||||
}
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
// xmm3 = ga
|
||||
|
||||
AlphaTFX();
|
||||
AlphaTFX_AVX();
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
// xmm3 = ga
|
||||
|
||||
ReadMask();
|
||||
ReadMask_AVX();
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
|
@ -157,7 +157,7 @@ L("loop");
|
|||
// xmm4 = fm
|
||||
// xmm5 = zm
|
||||
|
||||
TestAlpha();
|
||||
TestAlpha_AVX();
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
|
@ -165,7 +165,7 @@ L("loop");
|
|||
// xmm4 = fm
|
||||
// xmm5 = zm
|
||||
|
||||
ColorTFX();
|
||||
ColorTFX_AVX();
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
|
@ -173,7 +173,7 @@ L("loop");
|
|||
// xmm4 = fm
|
||||
// xmm5 = zm
|
||||
|
||||
Fog();
|
||||
Fog_AVX();
|
||||
|
||||
// ebp = za
|
||||
// xmm2 = rb
|
||||
|
@ -181,7 +181,7 @@ L("loop");
|
|||
// xmm4 = fm
|
||||
// xmm5 = zm
|
||||
|
||||
ReadFrame();
|
||||
ReadFrame_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// ebp = za
|
||||
|
@ -191,7 +191,7 @@ L("loop");
|
|||
// xmm5 = zm
|
||||
// xmm6 = fd
|
||||
|
||||
TestDestAlpha();
|
||||
TestDestAlpha_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// ebp = za
|
||||
|
@ -201,7 +201,7 @@ L("loop");
|
|||
// xmm5 = zm
|
||||
// xmm6 = fd
|
||||
|
||||
WriteMask();
|
||||
WriteMask_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// edx = fzm
|
||||
|
@ -212,7 +212,7 @@ L("loop");
|
|||
// xmm5 = zm
|
||||
// xmm6 = fd
|
||||
|
||||
WriteZBuf();
|
||||
WriteZBuf_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// edx = fzm
|
||||
|
@ -221,7 +221,7 @@ L("loop");
|
|||
// xmm4 = fm
|
||||
// xmm6 = fd
|
||||
|
||||
AlphaBlend();
|
||||
AlphaBlend_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// edx = fzm
|
||||
|
@ -230,7 +230,7 @@ L("loop");
|
|||
// xmm4 = fm
|
||||
// xmm6 = fd
|
||||
|
||||
WriteFrame();
|
||||
WriteFrame_AVX();
|
||||
|
||||
L("step");
|
||||
|
||||
|
@ -242,7 +242,7 @@ L("step");
|
|||
|
||||
jle("exit", T_NEAR);
|
||||
|
||||
Step();
|
||||
Step_AVX();
|
||||
|
||||
jmp("loop", T_NEAR);
|
||||
}
|
||||
|
@ -277,7 +277,7 @@ L("exit");
|
|||
ret();
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
void GSDrawScanlineCodeGenerator::Init_AVX()
|
||||
{
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
|
@ -480,7 +480,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Step()
|
||||
void GSDrawScanlineCodeGenerator::Step_AVX()
|
||||
{
|
||||
// steps -= 4;
|
||||
|
||||
|
@ -603,7 +603,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
|
||||
{
|
||||
if(!m_sel.zb)
|
||||
{
|
||||
|
@ -661,7 +661,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
if(m_sel.ztest)
|
||||
{
|
||||
ReadPixel(xmm1, rbp);
|
||||
ReadPixel_AVX(xmm1, rbp);
|
||||
|
||||
if(m_sel.zwrite && m_sel.zpsm < 2)
|
||||
{
|
||||
|
@ -715,7 +715,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture_AVX()
|
||||
{
|
||||
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
|
@ -786,13 +786,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
Wrap(xmm4, xmm5);
|
||||
Wrap_AVX(xmm4, xmm5);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
Wrap(xmm4);
|
||||
Wrap_AVX(xmm4);
|
||||
}
|
||||
|
||||
// xmm4 = uv0
|
||||
|
@ -854,7 +854,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 0);
|
||||
ReadTexel_AVX(4, 0);
|
||||
|
||||
// xmm0 = c10
|
||||
// xmm1 = c11
|
||||
|
@ -944,7 +944,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 0);
|
||||
ReadTexel_AVX(1, 0);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -958,7 +958,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// xmm3 = ga
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
||||
void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv)
|
||||
{
|
||||
// xmm0, xmm1, xmm2, xmm3 = free
|
||||
|
||||
|
@ -1019,7 +1019,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
||||
void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
// xmm0, xmm1, xmm2, xmm3 = free
|
||||
|
||||
|
@ -1111,19 +1111,19 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX()
|
||||
{
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv)
|
||||
{
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX_AVX()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
|
@ -1261,7 +1261,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadMask()
|
||||
void GSDrawScanlineCodeGenerator::ReadMask_AVX()
|
||||
{
|
||||
if(m_sel.fwrite)
|
||||
{
|
||||
|
@ -1274,7 +1274,7 @@ void GSDrawScanlineCodeGenerator::ReadMask()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha()
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
|
||||
{
|
||||
switch(m_sel.atst)
|
||||
{
|
||||
|
@ -1345,7 +1345,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX()
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -1410,7 +1410,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Fog()
|
||||
void GSDrawScanlineCodeGenerator::Fog_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite || !m_sel.fge)
|
||||
{
|
||||
|
@ -1431,7 +1431,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
mix16(_ga, xmm6, _f);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame()
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame_AVX()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
|
@ -1449,10 +1449,10 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
|||
return;
|
||||
}
|
||||
|
||||
ReadPixel(_fd, rbx);
|
||||
ReadPixel_AVX(_fd, rbx);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()
|
||||
{
|
||||
if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
|
||||
{
|
||||
|
@ -1496,7 +1496,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
alltrue();
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
void GSDrawScanlineCodeGenerator::WriteMask_AVX()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
|
@ -1542,7 +1542,7 @@ void GSDrawScanlineCodeGenerator::WriteMask()
|
|||
not(edx);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf()
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf_AVX()
|
||||
{
|
||||
if(!m_sel.zwrite)
|
||||
{
|
||||
|
@ -1571,10 +1571,10 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, rbp, dh, fast, m_sel.zpsm, 1);
|
||||
WritePixel_AVX(xmm1, rbp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend()
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -1798,7 +1798,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame()
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -1889,16 +1889,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm2, rbx, dl, fast, m_sel.fpsm, 0);
|
||||
WritePixel_AVX(xmm2, rbx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg64& addr)
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel_AVX(const Xmm& dst, const Reg64& addr)
|
||||
{
|
||||
vmovq(dst, qword[_m_local__gd__vm + addr * 2]);
|
||||
vmovhps(dst, qword[_m_local__gd__vm + addr * 2 + 8 * 2]);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg64& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
|
@ -1909,10 +1909,10 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
|
|||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
WritePixel_AVX(src, addr, 0, psm);
|
||||
WritePixel_AVX(src, addr, 1, psm);
|
||||
WritePixel_AVX(src, addr, 2, psm);
|
||||
WritePixel_AVX(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -1943,22 +1943,22 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
|
|||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel_AVX(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel_AVX(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel_AVX(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
WritePixel_AVX(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
|
@ -1966,7 +1966,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
|
|||
|
||||
static const int s_offsets[4] = {0, 2, 8, 10};
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr, uint8 i, int psm)
|
||||
void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg64& addr, uint8 i, int psm)
|
||||
{
|
||||
Address dst = ptr[_m_local__gd__vm + addr * 2 + s_offsets[i] * 2];
|
||||
|
||||
|
@ -1990,7 +1990,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg64& addr,
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset)
|
||||
{
|
||||
const int in[] = {0, 1, 2, 3};
|
||||
const int out[] = {4, 5, 0, 1};
|
||||
|
@ -1999,12 +1999,12 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
{
|
||||
for(int j = 0; j < 4; j++)
|
||||
{
|
||||
ReadTexel(Xmm(out[i]), Xmm(in[i]), j);
|
||||
ReadTexel_AVX(Xmm(out[i]), Xmm(in[i]), j);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
{
|
||||
const Address& src = m_sel.tlu ? ptr[_m_local__gd__clut + rax * 4] : ptr[_m_local__gd__tex + rax * 4];
|
||||
|
||||
|
@ -2026,7 +2026,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uin
|
|||
// And palette need zero masking.
|
||||
// It is not possible to use same source/destination so linear interpolation must be updated
|
||||
#if 0
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset)
|
||||
{
|
||||
const int in[] = {0, 1, 2, 3};
|
||||
const int out[] = {4, 5, 0, 1};
|
||||
|
|
|
@ -23,21 +23,20 @@
|
|||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
#if _M_SSE == 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
#if _M_SSE < 0x501 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
static const int _args = 16;
|
||||
static const int _top = _args + 4;
|
||||
static const int _v = _args + 8;
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Generate()
|
||||
void GSDrawScanlineCodeGenerator::Generate_AVX()
|
||||
{
|
||||
//ret(8);
|
||||
push(ebx);
|
||||
push(esi);
|
||||
push(edi);
|
||||
push(ebp);
|
||||
|
||||
Init();
|
||||
Init_AVX();
|
||||
|
||||
if(!m_sel.edge)
|
||||
{
|
||||
|
@ -59,7 +58,7 @@ L("loop");
|
|||
|
||||
bool tme = m_sel.tfx != TFX_NONE;
|
||||
|
||||
TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);
|
||||
TestZ_AVX(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -75,11 +74,11 @@ L("loop");
|
|||
|
||||
if(m_sel.mmin)
|
||||
{
|
||||
SampleTextureLOD();
|
||||
SampleTextureLOD_AVX();
|
||||
}
|
||||
else
|
||||
{
|
||||
SampleTexture();
|
||||
SampleTexture_AVX();
|
||||
}
|
||||
|
||||
// ecx = steps
|
||||
|
@ -93,7 +92,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
AlphaTFX();
|
||||
AlphaTFX_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -104,7 +103,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
ReadMask();
|
||||
ReadMask_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -117,7 +116,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
TestAlpha();
|
||||
TestAlpha_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -130,7 +129,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
ColorTFX();
|
||||
ColorTFX_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -142,7 +141,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
Fog();
|
||||
Fog_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -154,7 +153,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
ReadFrame();
|
||||
ReadFrame_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -167,7 +166,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
TestDestAlpha();
|
||||
TestDestAlpha_AVX();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -180,7 +179,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
WriteMask();
|
||||
WriteMask_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// ecx = steps
|
||||
|
@ -194,7 +193,7 @@ L("loop");
|
|||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
|
||||
WriteZBuf();
|
||||
WriteZBuf_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// ecx = steps
|
||||
|
@ -208,7 +207,7 @@ L("loop");
|
|||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
|
||||
AlphaBlend();
|
||||
AlphaBlend_AVX();
|
||||
|
||||
// ebx = fa
|
||||
// ecx = steps
|
||||
|
@ -220,7 +219,7 @@ L("loop");
|
|||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
|
||||
WriteFrame();
|
||||
WriteFrame_AVX();
|
||||
|
||||
L("step");
|
||||
|
||||
|
@ -232,7 +231,7 @@ L("step");
|
|||
|
||||
jle("exit", T_NEAR);
|
||||
|
||||
Step();
|
||||
Step_AVX();
|
||||
|
||||
jmp("loop", T_NEAR);
|
||||
}
|
||||
|
@ -249,7 +248,7 @@ L("exit");
|
|||
ret(8);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
void GSDrawScanlineCodeGenerator::Init_AVX()
|
||||
{
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
|
@ -455,7 +454,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Step()
|
||||
void GSDrawScanlineCodeGenerator::Step_AVX()
|
||||
{
|
||||
// steps -= 4;
|
||||
|
||||
|
@ -596,7 +595,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
void GSDrawScanlineCodeGenerator::TestZ_AVX(const Xmm& temp1, const Xmm& temp2)
|
||||
{
|
||||
if(!m_sel.zb)
|
||||
{
|
||||
|
@ -644,7 +643,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
if(m_sel.ztest)
|
||||
{
|
||||
ReadPixel(xmm1, ebp);
|
||||
ReadPixel_AVX(xmm1, ebp);
|
||||
|
||||
if(m_sel.zwrite && m_sel.zpsm < 2)
|
||||
{
|
||||
|
@ -694,7 +693,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture_AVX()
|
||||
{
|
||||
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
|
@ -775,13 +774,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
Wrap(xmm2, xmm3);
|
||||
Wrap_AVX(xmm2, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
Wrap(xmm2);
|
||||
Wrap_AVX(xmm2);
|
||||
}
|
||||
|
||||
// xmm2 = uv0
|
||||
|
@ -843,7 +842,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 0);
|
||||
ReadTexel_AVX(4, 0);
|
||||
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -935,7 +934,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 0);
|
||||
ReadTexel_AVX(1, 0);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -946,7 +945,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
||||
void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv)
|
||||
{
|
||||
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
|
||||
|
||||
|
@ -1007,7 +1006,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
||||
void GSDrawScanlineCodeGenerator::Wrap_AVX(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
|
||||
|
||||
|
@ -1099,7 +1098,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD_AVX()
|
||||
{
|
||||
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
|
@ -1360,13 +1359,13 @@ return;
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
WrapLOD(xmm2, xmm3);
|
||||
WrapLOD_AVX(xmm2, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
WrapLOD(xmm2);
|
||||
WrapLOD_AVX(xmm2);
|
||||
}
|
||||
|
||||
// xmm2 = uv0
|
||||
|
@ -1428,7 +1427,7 @@ return;
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 0);
|
||||
ReadTexel_AVX(4, 0);
|
||||
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -1520,7 +1519,7 @@ return;
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 0);
|
||||
ReadTexel_AVX(1, 0);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -1591,13 +1590,13 @@ return;
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
WrapLOD(xmm2, xmm3);
|
||||
WrapLOD_AVX(xmm2, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
WrapLOD(xmm2);
|
||||
WrapLOD_AVX(xmm2);
|
||||
}
|
||||
|
||||
// xmm2 = uv0
|
||||
|
@ -1659,7 +1658,7 @@ return;
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 1);
|
||||
ReadTexel_AVX(4, 1);
|
||||
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -1751,7 +1750,7 @@ return;
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 1);
|
||||
ReadTexel_AVX(1, 1);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -1774,7 +1773,7 @@ return;
|
|||
pop(ebp);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv)
|
||||
{
|
||||
// xmm5 = minuv
|
||||
// xmm6 = maxuv
|
||||
|
@ -1835,7 +1834,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD_AVX(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
// xmm5 = minuv
|
||||
// xmm6 = maxuv
|
||||
|
@ -1923,7 +1922,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX_AVX()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
|
@ -2071,7 +2070,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadMask()
|
||||
void GSDrawScanlineCodeGenerator::ReadMask_AVX()
|
||||
{
|
||||
if(m_sel.fwrite)
|
||||
{
|
||||
|
@ -2084,7 +2083,7 @@ void GSDrawScanlineCodeGenerator::ReadMask()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha()
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha_AVX()
|
||||
{
|
||||
switch(m_sel.atst)
|
||||
{
|
||||
|
@ -2155,7 +2154,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX()
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -2231,7 +2230,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Fog()
|
||||
void GSDrawScanlineCodeGenerator::Fog_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite || !m_sel.fge)
|
||||
{
|
||||
|
@ -2252,7 +2251,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
mix16(xmm6, xmm1, xmm0);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame()
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame_AVX()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
|
@ -2270,10 +2269,10 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
|||
return;
|
||||
}
|
||||
|
||||
ReadPixel(xmm2, ebx);
|
||||
ReadPixel_AVX(xmm2, ebx);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha_AVX()
|
||||
{
|
||||
if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
|
||||
{
|
||||
|
@ -2317,7 +2316,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
alltrue();
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
void GSDrawScanlineCodeGenerator::WriteMask_AVX()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
|
@ -2363,7 +2362,7 @@ void GSDrawScanlineCodeGenerator::WriteMask()
|
|||
not(edx);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf()
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf_AVX()
|
||||
{
|
||||
if(!m_sel.zwrite)
|
||||
{
|
||||
|
@ -2381,10 +2380,10 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
WritePixel_AVX(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend()
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -2606,7 +2605,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame()
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame_AVX()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -2686,16 +2685,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
WritePixel_AVX(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel_AVX(const Xmm& dst, const Reg32& addr)
|
||||
{
|
||||
vmovq(dst, qword[addr * 2 + (size_t)m_local.gd->vm]);
|
||||
vmovhps(dst, qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2]);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
|
@ -2706,10 +2705,10 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
WritePixel_AVX(src, addr, 0, psm);
|
||||
WritePixel_AVX(src, addr, 1, psm);
|
||||
WritePixel_AVX(src, addr, 2, psm);
|
||||
WritePixel_AVX(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -2740,22 +2739,22 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel_AVX(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel_AVX(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel_AVX(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
WritePixel_AVX(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
|
@ -2763,7 +2762,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
|
||||
static const int s_offsets[] = {0, 2, 8, 10};
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm)
|
||||
void GSDrawScanlineCodeGenerator::WritePixel_AVX(const Xmm& src, const Reg32& addr, uint8 i, int psm)
|
||||
{
|
||||
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
|
||||
|
||||
|
@ -2788,7 +2787,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_AVX(int pixels, int mip_offset)
|
||||
{
|
||||
// in
|
||||
// xmm5 = addr00
|
||||
|
@ -2827,7 +2826,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
ReadTexel_AVX(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2846,19 +2845,18 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
}
|
||||
|
||||
const int r[] = {5, 6, 2, 4, 0, 1, 3, 5};
|
||||
const int t[] = {4, 1, 5, 2};
|
||||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
for(uint8 j = 0; j < 4; j++)
|
||||
{
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
ReadTexel_AVX(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_AVX(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
{
|
||||
ASSERT(i < 4);
|
||||
|
||||
|
|
|
@ -23,20 +23,20 @@
|
|||
#include "GSDrawScanlineCodeGenerator.h"
|
||||
#include "GSVertexSW.h"
|
||||
|
||||
#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
#if _M_SSE < 0x501 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
static const int _args = 16;
|
||||
static const int _top = _args + 4;
|
||||
static const int _v = _args + 8;
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Generate()
|
||||
void GSDrawScanlineCodeGenerator::Generate_SSE()
|
||||
{
|
||||
push(ebx);
|
||||
push(esi);
|
||||
push(edi);
|
||||
push(ebp);
|
||||
|
||||
Init();
|
||||
Init_SSE();
|
||||
|
||||
if(!m_sel.edge)
|
||||
{
|
||||
|
@ -58,7 +58,7 @@ L("loop");
|
|||
|
||||
bool tme = m_sel.tfx != TFX_NONE;
|
||||
|
||||
TestZ(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);
|
||||
TestZ_SSE(tme ? xmm5 : xmm2, tme ? xmm6 : xmm3);
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -74,11 +74,11 @@ L("loop");
|
|||
|
||||
if(m_sel.mmin)
|
||||
{
|
||||
SampleTextureLOD();
|
||||
SampleTextureLOD_SSE();
|
||||
}
|
||||
else
|
||||
{
|
||||
SampleTexture();
|
||||
SampleTexture_SSE();
|
||||
}
|
||||
|
||||
// ecx = steps
|
||||
|
@ -92,7 +92,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
AlphaTFX();
|
||||
AlphaTFX_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -103,7 +103,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
ReadMask();
|
||||
ReadMask_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -116,7 +116,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
TestAlpha();
|
||||
TestAlpha_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -129,7 +129,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
ColorTFX();
|
||||
ColorTFX_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -141,7 +141,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
Fog();
|
||||
Fog_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -153,7 +153,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
ReadFrame();
|
||||
ReadFrame_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -166,7 +166,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
TestDestAlpha();
|
||||
TestDestAlpha_SSE();
|
||||
|
||||
// ecx = steps
|
||||
// esi = fzbr
|
||||
|
@ -179,7 +179,7 @@ L("loop");
|
|||
// xmm6 = ga
|
||||
// xmm7 = test
|
||||
|
||||
WriteMask();
|
||||
WriteMask_SSE();
|
||||
|
||||
// ebx = fa
|
||||
// ecx = steps
|
||||
|
@ -193,7 +193,7 @@ L("loop");
|
|||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
|
||||
WriteZBuf();
|
||||
WriteZBuf_SSE();
|
||||
|
||||
// ebx = fa
|
||||
// ecx = steps
|
||||
|
@ -207,7 +207,7 @@ L("loop");
|
|||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
|
||||
AlphaBlend();
|
||||
AlphaBlend_SSE();
|
||||
|
||||
// ebx = fa
|
||||
// ecx = steps
|
||||
|
@ -219,7 +219,7 @@ L("loop");
|
|||
// xmm5 = rb
|
||||
// xmm6 = ga
|
||||
|
||||
WriteFrame();
|
||||
WriteFrame_SSE();
|
||||
|
||||
L("step");
|
||||
|
||||
|
@ -231,7 +231,7 @@ L("step");
|
|||
|
||||
jle("exit", T_NEAR);
|
||||
|
||||
Step();
|
||||
Step_SSE();
|
||||
|
||||
jmp("loop", T_NEAR);
|
||||
}
|
||||
|
@ -248,7 +248,7 @@ L("exit");
|
|||
ret(8);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Init()
|
||||
void GSDrawScanlineCodeGenerator::Init_SSE()
|
||||
{
|
||||
if(!m_sel.notest)
|
||||
{
|
||||
|
@ -457,7 +457,7 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Step()
|
||||
void GSDrawScanlineCodeGenerator::Step_SSE()
|
||||
{
|
||||
// steps -= 4;
|
||||
|
||||
|
@ -600,7 +600,7 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
||||
void GSDrawScanlineCodeGenerator::TestZ_SSE(const Xmm& temp1, const Xmm& temp2)
|
||||
{
|
||||
if(!m_sel.zb)
|
||||
{
|
||||
|
@ -648,7 +648,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
|
||||
if(m_sel.ztest)
|
||||
{
|
||||
ReadPixel(xmm1, ebp);
|
||||
ReadPixel_SSE(xmm1, ebp);
|
||||
|
||||
if(m_sel.zwrite && m_sel.zpsm < 2)
|
||||
{
|
||||
|
@ -698,7 +698,7 @@ void GSDrawScanlineCodeGenerator::TestZ(const Xmm& temp1, const Xmm& temp2)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture()
|
||||
void GSDrawScanlineCodeGenerator::SampleTexture_SSE()
|
||||
{
|
||||
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
|
@ -780,13 +780,13 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
Wrap(xmm2, xmm3);
|
||||
Wrap_SSE(xmm2, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
Wrap(xmm2);
|
||||
Wrap_SSE(xmm2);
|
||||
}
|
||||
|
||||
// xmm2 = uv0
|
||||
|
@ -853,7 +853,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 0);
|
||||
ReadTexel_SSE(4, 0);
|
||||
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -946,7 +946,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 0);
|
||||
ReadTexel_SSE(1, 0);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -957,7 +957,7 @@ void GSDrawScanlineCodeGenerator::SampleTexture()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
||||
void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv)
|
||||
{
|
||||
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
|
||||
|
||||
|
@ -1020,7 +1020,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
||||
void GSDrawScanlineCodeGenerator::Wrap_SSE(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
// xmm0, xmm1, xmm4, xmm5, xmm6 = free
|
||||
|
||||
|
@ -1131,7 +1131,7 @@ void GSDrawScanlineCodeGenerator::Wrap(const Xmm& uv0, const Xmm& uv1)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
||||
void GSDrawScanlineCodeGenerator::SampleTextureLOD_SSE()
|
||||
{
|
||||
if(!m_sel.fb || m_sel.tfx == TFX_NONE)
|
||||
{
|
||||
|
@ -1140,7 +1140,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
push(ebp);
|
||||
|
||||
mov(ebp, (size_t)m_local.gd->tex);
|
||||
mov(ebp, (size_t)m_local.gd->tex);
|
||||
|
||||
if(m_sel.tlu)
|
||||
{
|
||||
|
@ -1354,13 +1354,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
WrapLOD(xmm2, xmm3);
|
||||
WrapLOD_SSE(xmm2, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
WrapLOD(xmm2);
|
||||
WrapLOD_SSE(xmm2);
|
||||
}
|
||||
|
||||
// xmm2 = uv0
|
||||
|
@ -1427,7 +1427,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 0);
|
||||
ReadTexel_SSE(4, 0);
|
||||
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -1520,7 +1520,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 0);
|
||||
ReadTexel_SSE(1, 0);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -1592,13 +1592,13 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// uv0 = Wrap(uv0);
|
||||
// uv1 = Wrap(uv1);
|
||||
|
||||
WrapLOD(xmm2, xmm3);
|
||||
WrapLOD_SSE(xmm2, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
// uv0 = Wrap(uv0);
|
||||
|
||||
WrapLOD(xmm2);
|
||||
WrapLOD_SSE(xmm2);
|
||||
}
|
||||
|
||||
// xmm2 = uv0
|
||||
|
@ -1665,7 +1665,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
// c10 = addr10.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
// c11 = addr11.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(4, 1);
|
||||
ReadTexel_SSE(4, 1);
|
||||
|
||||
// xmm6 = c00
|
||||
// xmm4 = c01
|
||||
|
@ -1758,7 +1758,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
// c00 = addr00.gather32_32((const uint32/uint8*)tex[, clut]);
|
||||
|
||||
ReadTexel(1, 1);
|
||||
ReadTexel_SSE(1, 1);
|
||||
|
||||
// GSVector4i mask = GSVector4i::x00ff();
|
||||
|
||||
|
@ -1781,7 +1781,7 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
pop(ebp);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv)
|
||||
{
|
||||
// xmm5 = minuv
|
||||
// xmm6 = maxuv
|
||||
|
@ -1844,7 +1844,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
||||
void GSDrawScanlineCodeGenerator::WrapLOD_SSE(const Xmm& uv0, const Xmm& uv1)
|
||||
{
|
||||
// xmm5 = minuv
|
||||
// xmm6 = maxuv
|
||||
|
@ -1950,7 +1950,7 @@ void GSDrawScanlineCodeGenerator::WrapLOD(const Xmm& uv0, const Xmm& uv1)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX()
|
||||
void GSDrawScanlineCodeGenerator::AlphaTFX_SSE()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
|
@ -2098,7 +2098,7 @@ void GSDrawScanlineCodeGenerator::AlphaTFX()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadMask()
|
||||
void GSDrawScanlineCodeGenerator::ReadMask_SSE()
|
||||
{
|
||||
if(m_sel.fwrite)
|
||||
{
|
||||
|
@ -2111,7 +2111,7 @@ void GSDrawScanlineCodeGenerator::ReadMask()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha()
|
||||
void GSDrawScanlineCodeGenerator::TestAlpha_SSE()
|
||||
{
|
||||
switch(m_sel.atst)
|
||||
{
|
||||
|
@ -2186,7 +2186,7 @@ void GSDrawScanlineCodeGenerator::TestAlpha()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX()
|
||||
void GSDrawScanlineCodeGenerator::ColorTFX_SSE()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -2262,7 +2262,7 @@ void GSDrawScanlineCodeGenerator::ColorTFX()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::Fog()
|
||||
void GSDrawScanlineCodeGenerator::Fog_SSE()
|
||||
{
|
||||
if(!m_sel.fwrite || !m_sel.fge)
|
||||
{
|
||||
|
@ -2283,7 +2283,7 @@ void GSDrawScanlineCodeGenerator::Fog()
|
|||
mix16(xmm6, xmm1, xmm0);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame()
|
||||
void GSDrawScanlineCodeGenerator::ReadFrame_SSE()
|
||||
{
|
||||
if(!m_sel.fb)
|
||||
{
|
||||
|
@ -2301,10 +2301,10 @@ void GSDrawScanlineCodeGenerator::ReadFrame()
|
|||
return;
|
||||
}
|
||||
|
||||
ReadPixel(xmm2, ebx);
|
||||
ReadPixel_SSE(xmm2, ebx);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
||||
void GSDrawScanlineCodeGenerator::TestDestAlpha_SSE()
|
||||
{
|
||||
if(!m_sel.date || m_sel.fpsm != 0 && m_sel.fpsm != 2)
|
||||
{
|
||||
|
@ -2347,7 +2347,7 @@ void GSDrawScanlineCodeGenerator::TestDestAlpha()
|
|||
alltrue();
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteMask()
|
||||
void GSDrawScanlineCodeGenerator::WriteMask_SSE()
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
|
@ -2394,7 +2394,7 @@ void GSDrawScanlineCodeGenerator::WriteMask()
|
|||
not(edx);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf()
|
||||
void GSDrawScanlineCodeGenerator::WriteZBuf_SSE()
|
||||
{
|
||||
if(!m_sel.zwrite)
|
||||
{
|
||||
|
@ -2414,10 +2414,10 @@ void GSDrawScanlineCodeGenerator::WriteZBuf()
|
|||
|
||||
bool fast = m_sel.ztest ? m_sel.zpsm < 2 : m_sel.zpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
WritePixel_SSE(xmm1, ebp, dh, fast, m_sel.zpsm, 1);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend()
|
||||
void GSDrawScanlineCodeGenerator::AlphaBlend_SSE()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -2654,7 +2654,7 @@ void GSDrawScanlineCodeGenerator::AlphaBlend()
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame()
|
||||
void GSDrawScanlineCodeGenerator::WriteFrame_SSE()
|
||||
{
|
||||
if(!m_sel.fwrite)
|
||||
{
|
||||
|
@ -2739,16 +2739,16 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
|
||||
bool fast = m_sel.rfb ? m_sel.fpsm < 2 : m_sel.fpsm == 0 && m_sel.notest;
|
||||
|
||||
WritePixel(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
WritePixel_SSE(xmm5, ebx, dl, fast, m_sel.fpsm, 0);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel(const Xmm& dst, const Reg32& addr)
|
||||
void GSDrawScanlineCodeGenerator::ReadPixel_SSE(const Xmm& dst, const Reg32& addr)
|
||||
{
|
||||
movq(dst, qword[addr * 2 + (size_t)m_local.gd->vm]);
|
||||
movhps(dst, qword[addr * 2 + (size_t)m_local.gd->vm + 8 * 2]);
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& addr, const Reg8& mask, bool fast, int psm, int fz)
|
||||
{
|
||||
if(m_sel.notest)
|
||||
{
|
||||
|
@ -2759,10 +2759,10 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
}
|
||||
else
|
||||
{
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel(src, addr, 3, psm);
|
||||
WritePixel_SSE(src, addr, 0, psm);
|
||||
WritePixel_SSE(src, addr, 1, psm);
|
||||
WritePixel_SSE(src, addr, 2, psm);
|
||||
WritePixel_SSE(src, addr, 3, psm);
|
||||
}
|
||||
}
|
||||
else
|
||||
|
@ -2791,22 +2791,22 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
|
||||
test(mask, 0x03);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 0, psm);
|
||||
WritePixel_SSE(src, addr, 0, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x0c);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 1, psm);
|
||||
WritePixel_SSE(src, addr, 1, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0x30);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 2, psm);
|
||||
WritePixel_SSE(src, addr, 2, psm);
|
||||
L("@@");
|
||||
|
||||
test(mask, 0xc0);
|
||||
je("@f");
|
||||
WritePixel(src, addr, 3, psm);
|
||||
WritePixel_SSE(src, addr, 3, psm);
|
||||
L("@@");
|
||||
}
|
||||
}
|
||||
|
@ -2814,7 +2814,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
|
||||
static const int s_offsets[4] = {0, 2, 8, 10};
|
||||
|
||||
void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr, uint8 i, int psm)
|
||||
void GSDrawScanlineCodeGenerator::WritePixel_SSE(const Xmm& src, const Reg32& addr, uint8 i, int psm)
|
||||
{
|
||||
Address dst = ptr[addr * 2 + (size_t)m_local.gd->vm + s_offsets[i] * 2];
|
||||
|
||||
|
@ -2854,7 +2854,7 @@ void GSDrawScanlineCodeGenerator::WritePixel(const Xmm& src, const Reg32& addr,
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_SSE(int pixels, int mip_offset)
|
||||
{
|
||||
// in
|
||||
// xmm5 = addr00
|
||||
|
@ -2896,7 +2896,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
|
||||
for(int i = 0; i < pixels; i++)
|
||||
{
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2916,17 +2916,17 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
mov(ebx, ptr[&lod_i->u32[0]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm6, xmm5, 0);
|
||||
ReadTexel_SSE(xmm6, xmm5, 0);
|
||||
psrldq(xmm5, 4);
|
||||
ReadTexel(xmm4, xmm2, 0);
|
||||
ReadTexel_SSE(xmm4, xmm2, 0);
|
||||
psrldq(xmm2, 4);
|
||||
|
||||
mov(ebx, ptr[&lod_i->u32[1]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm1, xmm5, 0);
|
||||
ReadTexel_SSE(xmm1, xmm5, 0);
|
||||
psrldq(xmm5, 4);
|
||||
ReadTexel(xmm7, xmm2, 0);
|
||||
ReadTexel_SSE(xmm7, xmm2, 0);
|
||||
psrldq(xmm2, 4);
|
||||
|
||||
punpckldq(xmm6, xmm1);
|
||||
|
@ -2935,16 +2935,16 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
mov(ebx, ptr[&lod_i->u32[2]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm1, xmm5, 0);
|
||||
ReadTexel_SSE(xmm1, xmm5, 0);
|
||||
psrldq(xmm5, 4);
|
||||
ReadTexel(xmm7, xmm2, 0);
|
||||
ReadTexel_SSE(xmm7, xmm2, 0);
|
||||
psrldq(xmm2, 4);
|
||||
|
||||
mov(ebx, ptr[&lod_i->u32[3]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm5, xmm5, 0);
|
||||
ReadTexel(xmm2, xmm2, 0);
|
||||
ReadTexel_SSE(xmm5, xmm5, 0);
|
||||
ReadTexel_SSE(xmm2, xmm2, 0);
|
||||
|
||||
punpckldq(xmm1, xmm5);
|
||||
punpckldq(xmm7, xmm2);
|
||||
|
@ -2955,17 +2955,17 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
mov(ebx, ptr[&lod_i->u32[0]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm1, xmm0, 0);
|
||||
ReadTexel_SSE(xmm1, xmm0, 0);
|
||||
psrldq(xmm0, 4);
|
||||
ReadTexel(xmm5, xmm3, 0);
|
||||
ReadTexel_SSE(xmm5, xmm3, 0);
|
||||
psrldq(xmm3, 4);
|
||||
|
||||
mov(ebx, ptr[&lod_i->u32[1]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm2, xmm0, 0);
|
||||
ReadTexel_SSE(xmm2, xmm0, 0);
|
||||
psrldq(xmm0, 4);
|
||||
ReadTexel(xmm7, xmm3, 0);
|
||||
ReadTexel_SSE(xmm7, xmm3, 0);
|
||||
psrldq(xmm3, 4);
|
||||
|
||||
punpckldq(xmm1, xmm2);
|
||||
|
@ -2974,16 +2974,16 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
mov(ebx, ptr[&lod_i->u32[2]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm2, xmm0, 0);
|
||||
ReadTexel_SSE(xmm2, xmm0, 0);
|
||||
psrldq(xmm0, 4);
|
||||
ReadTexel(xmm7, xmm3, 0);
|
||||
ReadTexel_SSE(xmm7, xmm3, 0);
|
||||
psrldq(xmm3, 4);
|
||||
|
||||
mov(ebx, ptr[&lod_i->u32[3]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm0, xmm0, 0);
|
||||
ReadTexel(xmm3, xmm3, 0);
|
||||
ReadTexel_SSE(xmm0, xmm0, 0);
|
||||
ReadTexel_SSE(xmm3, xmm3, 0);
|
||||
|
||||
punpckldq(xmm2, xmm0);
|
||||
punpckldq(xmm7, xmm3);
|
||||
|
@ -2998,13 +2998,13 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
mov(ebx, ptr[&lod_i->u32[0]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm6, xmm5, 0);
|
||||
ReadTexel_SSE(xmm6, xmm5, 0);
|
||||
psrldq(xmm5, 4); // shuffle instead? (1 2 3 0 ~ rotation)
|
||||
|
||||
mov(ebx, ptr[&lod_i->u32[1]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm1, xmm5, 0);
|
||||
ReadTexel_SSE(xmm1, xmm5, 0);
|
||||
psrldq(xmm5, 4);
|
||||
|
||||
punpckldq(xmm6, xmm1);
|
||||
|
@ -3012,13 +3012,13 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
mov(ebx, ptr[&lod_i->u32[2]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm1, xmm5, 0);
|
||||
ReadTexel_SSE(xmm1, xmm5, 0);
|
||||
psrldq(xmm5, 4);
|
||||
|
||||
mov(ebx, ptr[&lod_i->u32[3]]);
|
||||
mov(ebx, ptr[ebp + ebx * sizeof(void*) + mip_offset]);
|
||||
|
||||
ReadTexel(xmm4, xmm5, 0);
|
||||
ReadTexel_SSE(xmm4, xmm5, 0);
|
||||
// psrldq(xmm5, 4);
|
||||
|
||||
punpckldq(xmm1, xmm4);
|
||||
|
@ -3044,7 +3044,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
{
|
||||
for(int j = 0; j < 4; j++)
|
||||
{
|
||||
ReadTexel(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
ReadTexel_SSE(Xmm(r[i * 2 + 1]), Xmm(r[i * 2 + 0]), j);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -3058,15 +3058,15 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
const Xmm& temp1 = Xmm(t[i * 2 + 0]);
|
||||
const Xmm& temp2 = Xmm(t[i * 2 + 1]);
|
||||
|
||||
ReadTexel(dst, addr, 0);
|
||||
ReadTexel_SSE(dst, addr, 0);
|
||||
psrldq(addr, 4); // shuffle instead? (1 2 3 0 ~ rotation)
|
||||
ReadTexel(temp1, addr, 0);
|
||||
ReadTexel_SSE(temp1, addr, 0);
|
||||
psrldq(addr, 4);
|
||||
punpckldq(dst, temp1);
|
||||
|
||||
ReadTexel(temp1, addr, 0);
|
||||
ReadTexel_SSE(temp1, addr, 0);
|
||||
psrldq(addr, 4);
|
||||
ReadTexel(temp2, addr, 0);
|
||||
ReadTexel_SSE(temp2, addr, 0);
|
||||
// psrldq(addr, 4);
|
||||
punpckldq(temp1, temp2);
|
||||
|
||||
|
@ -3077,7 +3077,7 @@ void GSDrawScanlineCodeGenerator::ReadTexel(int pixels, int mip_offset)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
void GSDrawScanlineCodeGenerator::ReadTexel_SSE(const Xmm& dst, const Xmm& addr, uint8 i)
|
||||
{
|
||||
const Address& src = m_sel.tlu ? ptr[edx + eax * 4] : ptr[ebx + eax * 4];
|
||||
|
||||
|
|
Loading…
Reference in New Issue