mirror of https://github.com/PCSX2/pcsx2.git
gsdx-ogl: LINUX-ONLY * Sync with trunk. (r4970-4985)
git-svn-id: http://pcsx2.googlecode.com/svn/branches/gsdx-ogl@4986 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
commit
be53385e9f
|
@ -47,7 +47,12 @@ static __fi void SetResultSize(u8 size)
|
|||
static void CDVDREAD_INT(int eCycle)
|
||||
{
|
||||
// Give it an arbitary FAST value. Good for ~5000kb/s in ULE when copying a file from CDVD to HDD
|
||||
if (EmuConfig.Speedhacks.fastCDVD) eCycle = 3000;
|
||||
// Keep long seeks out though, as games may try to push dmas while seeking. (Tales of the Abyss)
|
||||
if (EmuConfig.Speedhacks.fastCDVD) {
|
||||
if(eCycle < Cdvd_FullSeek_Cycles)
|
||||
eCycle = 3000;
|
||||
}
|
||||
|
||||
PSX_INT(IopEvt_CdvdRead, eCycle);
|
||||
}
|
||||
|
||||
|
@ -647,6 +652,11 @@ int cdvdReadSector() {
|
|||
return -1;
|
||||
}
|
||||
|
||||
//if( (HW_DMA3_CHCR & 0x01000000) == 0 ) {
|
||||
// // DMA3 problem?
|
||||
// Console.Warning( "CDVD READ - DMA3 transfer off (try again)\n" );
|
||||
//}
|
||||
|
||||
// DMAs use physical addresses (air)
|
||||
u8* mdest = iopPhysMem( HW_DMA3_MADR );
|
||||
|
||||
|
|
|
@ -80,3 +80,421 @@ void GPUDrawScanline::EndDraw(const GSRasterizerStats& stats, uint64 frame)
|
|||
{
|
||||
m_ds_map.UpdateStats(stats, frame);
|
||||
}
|
||||
|
||||
void GPUDrawScanline::PrintStats()
|
||||
{
|
||||
m_ds_map.PrintStats();
|
||||
}
|
||||
|
||||
#ifndef JIT_DRAW
|
||||
|
||||
void GPUDrawScanline::SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan)
|
||||
{
|
||||
GPUScanlineSelector sel = m_global.sel;
|
||||
|
||||
const GSVector4* shift = GPUSetupPrimCodeGenerator::m_shift;
|
||||
|
||||
if(sel.tme && !sel.twin)
|
||||
{
|
||||
if(sel.sprite)
|
||||
{
|
||||
GSVector4i t = (GSVector4i(vertices[1].t) >> 8) - GSVector4i::x00000001();
|
||||
|
||||
t = t.ps32(t);
|
||||
t = t.upl16(t);
|
||||
|
||||
m_local.twin[2].u = t.xxxx();
|
||||
m_local.twin[2].v = t.yyyy();
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: not really needed
|
||||
|
||||
m_local.twin[2].u = GSVector4i::x00ff();
|
||||
m_local.twin[2].v = GSVector4i::x00ff();
|
||||
}
|
||||
}
|
||||
|
||||
if(sel.tme || sel.iip && sel.tfx != 3)
|
||||
{
|
||||
GSVector4 dt = dscan.t;
|
||||
GSVector4 dc = dscan.c;
|
||||
|
||||
GSVector4i dtc8 = GSVector4i(dt * shift[0]).ps32(GSVector4i(dc * shift[0]));
|
||||
|
||||
if(sel.tme)
|
||||
{
|
||||
m_local.d8.st = dtc8.upl16(dtc8);
|
||||
}
|
||||
|
||||
if(sel.iip && sel.tfx != 3)
|
||||
{
|
||||
m_local.d8.c = dtc8.uph16(dtc8);
|
||||
}
|
||||
|
||||
if(sel.tme)
|
||||
{
|
||||
GSVector4 dtx = dt.xxxx();
|
||||
GSVector4 dty = dt.yyyy();
|
||||
|
||||
m_local.d.s = GSVector4i(dtx * shift[1]).ps32(GSVector4i(dtx * shift[2]));
|
||||
m_local.d.t = GSVector4i(dty * shift[1]).ps32(GSVector4i(dty * shift[2]));
|
||||
}
|
||||
|
||||
if(sel.iip && sel.tfx != 3)
|
||||
{
|
||||
GSVector4 dcx = dc.xxxx();
|
||||
GSVector4 dcy = dc.yyyy();
|
||||
GSVector4 dcz = dc.zzzz();
|
||||
|
||||
m_local.d.r = GSVector4i(dcx * shift[1]).ps32(GSVector4i(dcx * shift[2]));
|
||||
m_local.d.g = GSVector4i(dcy * shift[1]).ps32(GSVector4i(dcy * shift[2]));
|
||||
m_local.d.b = GSVector4i(dcz * shift[1]).ps32(GSVector4i(dcz * shift[2]));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDrawScanline::DrawScanline(int pixels, int left, int top, const GSVertexSW& scan)
|
||||
{
|
||||
// TODO: not tested yet, probably bogus
|
||||
|
||||
GPUScanlineSelector sel = m_global.sel;
|
||||
|
||||
GSVector4i s, t;
|
||||
GSVector4i uf, vf;
|
||||
GSVector4i rf, gf, bf;
|
||||
GSVector4i dither;
|
||||
|
||||
// Init
|
||||
|
||||
uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left;
|
||||
|
||||
int steps = pixels - 8;
|
||||
|
||||
if(sel.dtd)
|
||||
{
|
||||
dither = GSVector4i::load<false>(&GPUDrawScanlineCodeGenerator::m_dither[top & 3][left & 3]);
|
||||
}
|
||||
|
||||
if(sel.tme)
|
||||
{
|
||||
GSVector4i vt = GSVector4i(scan.t).xxzzl();
|
||||
|
||||
s = vt.xxxx().add16(m_local.d.s);
|
||||
t = vt.yyyy();
|
||||
|
||||
if(!sel.sprite)
|
||||
{
|
||||
t = t.add16(m_local.d.t);
|
||||
}
|
||||
else
|
||||
{
|
||||
if(sel.ltf)
|
||||
{
|
||||
vf = t.sll16(1).srl16(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(sel.tfx != 3)
|
||||
{
|
||||
GSVector4i vc = GSVector4i(scan.c).xxzzlh();
|
||||
|
||||
rf = vc.xxxx();
|
||||
gf = vc.yyyy();
|
||||
bf = vc.zzzz();
|
||||
|
||||
if(sel.iip)
|
||||
{
|
||||
rf = rf.add16(m_local.d.r);
|
||||
gf = gf.add16(m_local.d.g);
|
||||
bf = bf.add16(m_local.d.b);
|
||||
}
|
||||
}
|
||||
|
||||
while(1)
|
||||
{
|
||||
do
|
||||
{
|
||||
GSVector4i test = GPUDrawScanlineCodeGenerator::m_test[7 + (steps & (steps >> 31))];
|
||||
|
||||
GSVector4i fd = GSVector4i::load(fb, fb + 8);
|
||||
|
||||
GSVector4i r, g, b, a;
|
||||
|
||||
// TestMask
|
||||
|
||||
if(sel.me)
|
||||
{
|
||||
test |= fd.sra16(15);
|
||||
|
||||
if(test.alltrue()) continue;
|
||||
}
|
||||
|
||||
// SampleTexture
|
||||
|
||||
if(sel.tme)
|
||||
{
|
||||
GSVector4i u0, v0, u1, v1;
|
||||
GSVector4i addr00, addr01, addr10, addr11;
|
||||
GSVector4i c00, c01, c10, c11;
|
||||
|
||||
if(sel.ltf)
|
||||
{
|
||||
u0 = s.sub16(GSVector4i(0x00200020)); // - 0.125f
|
||||
v0 = t.sub16(GSVector4i(0x00200020)); // - 0.125f
|
||||
|
||||
uf = u0.sll16(8).srl16(1);
|
||||
vf = v0.sll16(8).srl16(1);;
|
||||
}
|
||||
else
|
||||
{
|
||||
u0 = s;
|
||||
v0 = t;
|
||||
}
|
||||
|
||||
u0 = u0.srl16(8);
|
||||
v0 = v0.srl16(8);
|
||||
|
||||
if(sel.ltf)
|
||||
{
|
||||
u1 = u0.add16(GSVector4i::x0001());
|
||||
v1 = v0.add16(GSVector4i::x0001());
|
||||
|
||||
if(sel.twin)
|
||||
{
|
||||
u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u);
|
||||
v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v);
|
||||
u1 = (u1 & m_local.twin[0].u).add16(m_local.twin[1].u);
|
||||
v1 = (v1 & m_local.twin[0].v).add16(m_local.twin[1].v);
|
||||
}
|
||||
else
|
||||
{
|
||||
u0 = u0.min_i16(m_local.twin[2].u);
|
||||
v0 = v0.min_i16(m_local.twin[2].v);
|
||||
u1 = u1.min_i16(m_local.twin[2].u);
|
||||
v1 = v1.min_i16(m_local.twin[2].v);
|
||||
}
|
||||
|
||||
addr00 = v0.sll16(8) | u0;
|
||||
addr01 = v0.sll16(8) | u1;
|
||||
addr10 = v1.sll16(8) | u0;
|
||||
addr11 = v1.sll16(8) | u1;
|
||||
|
||||
// TODO
|
||||
|
||||
if(sel.tlu)
|
||||
{
|
||||
c00 = addr00.gather16_16((const uint16*)m_global.vm, m_global.clut);
|
||||
c01 = addr01.gather16_16((const uint16*)m_global.vm, m_global.clut);
|
||||
c10 = addr10.gather16_16((const uint16*)m_global.vm, m_global.clut);
|
||||
c11 = addr11.gather16_16((const uint16*)m_global.vm, m_global.clut);
|
||||
}
|
||||
else
|
||||
{
|
||||
c00 = addr00.gather16_16((const uint16*)m_global.vm);
|
||||
c01 = addr01.gather16_16((const uint16*)m_global.vm);
|
||||
c10 = addr10.gather16_16((const uint16*)m_global.vm);
|
||||
c11 = addr11.gather16_16((const uint16*)m_global.vm);
|
||||
}
|
||||
|
||||
GSVector4i r00 = c00.sll16(11).srl16(8);
|
||||
GSVector4i r01 = c01.sll16(11).srl16(8);
|
||||
GSVector4i r10 = c10.sll16(11).srl16(8);
|
||||
GSVector4i r11 = c11.sll16(11).srl16(8);
|
||||
|
||||
r00 = r00.lerp16<0>(r01, uf);
|
||||
r10 = r10.lerp16<0>(r11, uf);
|
||||
|
||||
GSVector4i g00 = c00.sll16(6).srl16(11).sll16(3);
|
||||
GSVector4i g01 = c01.sll16(6).srl16(11).sll16(3);
|
||||
GSVector4i g10 = c10.sll16(6).srl16(11).sll16(3);
|
||||
GSVector4i g11 = c11.sll16(6).srl16(11).sll16(3);
|
||||
|
||||
g00 = g00.lerp16<0>(g01, uf);
|
||||
g10 = g10.lerp16<0>(g11, uf);
|
||||
|
||||
GSVector4i b00 = c00.sll16(1).srl16(11).sll16(3);
|
||||
GSVector4i b01 = c01.sll16(1).srl16(11).sll16(3);
|
||||
GSVector4i b10 = c10.sll16(1).srl16(11).sll16(3);
|
||||
GSVector4i b11 = c11.sll16(1).srl16(11).sll16(3);
|
||||
|
||||
b00 = b00.lerp16<0>(b01, uf);
|
||||
b10 = b10.lerp16<0>(b11, uf);
|
||||
|
||||
GSVector4i a00 = c00.sra16(15).sll16(8);
|
||||
GSVector4i a01 = c01.sra16(15).sll16(8);
|
||||
GSVector4i a10 = c10.sra16(15).sll16(8);
|
||||
GSVector4i a11 = c11.sra16(15).sll16(8);
|
||||
|
||||
a00 = a00.lerp16<0>(a01, uf);
|
||||
a10 = a10.lerp16<0>(a11, uf);
|
||||
|
||||
r = r00.lerp16<0>(r10, vf);
|
||||
g = g00.lerp16<0>(g10, vf);
|
||||
b = b00.lerp16<0>(b10, vf);
|
||||
a = a00.lerp16<0>(a10, vf);
|
||||
|
||||
test |= (r | g | b | a).eq16(GSVector4i::zero()); // mask out blank pixels (not perfect)
|
||||
|
||||
a = a.gt16(GSVector4i::zero());
|
||||
}
|
||||
else
|
||||
{
|
||||
if(sel.twin)
|
||||
{
|
||||
u0 = (u0 & m_local.twin[0].u).add16(m_local.twin[1].u);
|
||||
v0 = (v0 & m_local.twin[0].v).add16(m_local.twin[1].v);
|
||||
}
|
||||
else
|
||||
{
|
||||
u0 = u0.min_i16(m_local.twin[2].u);
|
||||
v0 = v0.min_i16(m_local.twin[2].v);
|
||||
}
|
||||
|
||||
addr00 = v0.sll16(8) | u0;
|
||||
|
||||
// TODO
|
||||
|
||||
if(sel.tlu)
|
||||
{
|
||||
c00 = addr00.gather16_16((const uint16*)m_global.vm, m_global.clut);
|
||||
}
|
||||
else
|
||||
{
|
||||
c00 = addr00.gather16_16((const uint16*)m_global.vm);
|
||||
}
|
||||
|
||||
r = (c00 << 3) & 0x00f800f8;
|
||||
g = (c00 >> 2) & 0x00f800f8;
|
||||
b = (c00 >> 7) & 0x00f800f8;
|
||||
a = c00.sra16(15);
|
||||
|
||||
test |= c00.eq16(GSVector4i::zero()); // mask out blank pixels
|
||||
}
|
||||
}
|
||||
|
||||
// ColorTFX
|
||||
|
||||
switch(sel.tfx)
|
||||
{
|
||||
case 0: // none (tfx = 0)
|
||||
case 1: // none (tfx = tge)
|
||||
r = rf.srl16(7);
|
||||
g = gf.srl16(7);
|
||||
b = bf.srl16(7);
|
||||
break;
|
||||
case 2: // modulate (tfx = tme | tge)
|
||||
r = r.modulate16<1>(rf).clamp8();
|
||||
g = g.modulate16<1>(gf).clamp8();
|
||||
b = b.modulate16<1>(bf).clamp8();
|
||||
break;
|
||||
case 3: // decal (tfx = tme)
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
// AlphaBlend
|
||||
|
||||
if(sel.abe)
|
||||
{
|
||||
GSVector4i rs = r;
|
||||
GSVector4i gs = g;
|
||||
GSVector4i bs = b;
|
||||
GSVector4i rd = (fd & 0x001f001f) << 3;
|
||||
GSVector4i gd = (fd & 0x03e003e0) >> 2;
|
||||
GSVector4i bd = (fd & 0x7c007c00) >> 7;
|
||||
|
||||
switch(sel.abr)
|
||||
{
|
||||
case 0:
|
||||
r = rd.avg8(rs);
|
||||
g = gd.avg8(gs);
|
||||
b = bd.avg8(bs);
|
||||
break;
|
||||
case 1:
|
||||
r = rd.addus8(rs);
|
||||
g = gd.addus8(gs);
|
||||
b = bd.addus8(bs);
|
||||
break;
|
||||
case 2:
|
||||
r = rd.subus8(rs);
|
||||
g = gd.subus8(gs);
|
||||
b = bd.subus8(bs);
|
||||
break;
|
||||
case 3:
|
||||
r = rd.addus8(rs.srl16(2));
|
||||
g = gd.addus8(gs.srl16(2));
|
||||
b = bd.addus8(bs.srl16(2));
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
if(sel.tme)
|
||||
{
|
||||
r = rs.blend8(rd, a);
|
||||
g = gs.blend8(gd, a);
|
||||
b = bs.blend8(bd, a);
|
||||
}
|
||||
}
|
||||
|
||||
// Dither
|
||||
|
||||
if(sel.dtd)
|
||||
{
|
||||
r = r.addus8(dither);
|
||||
g = g.addus8(dither);
|
||||
b = b.addus8(dither);
|
||||
}
|
||||
|
||||
// WriteFrame
|
||||
|
||||
GSVector4i fs = r | g | b | (sel.md ? GSVector4i(0x80008000) : sel.tme ? a : GSVector4i::zero());
|
||||
|
||||
fs = fs.blend8(fd, test);
|
||||
|
||||
GSVector4i::store(fb, fb + 8, fs);
|
||||
}
|
||||
while(0);
|
||||
|
||||
if(steps <= 0) break;
|
||||
|
||||
steps -= 8;
|
||||
|
||||
fb += 8;
|
||||
|
||||
if(sel.tme)
|
||||
{
|
||||
GSVector4i st = m_local.d8.st;
|
||||
|
||||
s = s.add16(st.xxxx());
|
||||
t = t.add16(st.yyyy());
|
||||
}
|
||||
|
||||
if(sel.tfx != 3) // != decal
|
||||
{
|
||||
if(sel.iip)
|
||||
{
|
||||
GSVector4i c = m_local.d8.c;
|
||||
|
||||
rf = rf.add16(c.xxxx());
|
||||
gf = gf.add16(c.yyyy());
|
||||
bf = bf.add16(c.zzzz());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void GPUDrawScanline::DrawEdge(int pixels, int left, int top, const GSVertexSW& scan)
|
||||
{
|
||||
ASSERT(0);
|
||||
}
|
||||
|
||||
void GPUDrawScanline::DrawRect(const GSVector4i& r, const GSVertexSW& v)
|
||||
{
|
||||
// TODO
|
||||
}
|
||||
|
||||
#endif
|
|
@ -43,5 +43,14 @@ public:
|
|||
|
||||
void BeginDraw(const void* param);
|
||||
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
||||
void PrintStats() {m_ds_map.PrintStats();}
|
||||
void PrintStats();
|
||||
|
||||
#ifndef JIT_DRAW
|
||||
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
#endif
|
||||
};
|
||||
|
|
|
@ -121,7 +121,7 @@ void GPUDrawScanlineCodeGenerator::Init()
|
|||
{
|
||||
mov(eax, dword[esp + _top]);
|
||||
|
||||
// uint16* fb = &m_local.vm[(top << (10 + m_sel.scalex)) + left];
|
||||
// uint16* fb = (uint16*)m_global.vm + (top << (10 + sel.scalex)) + left;
|
||||
|
||||
mov(edi, eax);
|
||||
shl(edi, 10 + m_sel.scalex);
|
||||
|
@ -134,7 +134,7 @@ void GPUDrawScanlineCodeGenerator::Init()
|
|||
|
||||
if(m_sel.dtd)
|
||||
{
|
||||
// dither = GSVector4i::load<false>(&s_dither[top & 3][left & 3]);
|
||||
// dither = GSVector4i::load<false>(&m_dither[top & 3][left & 3]);
|
||||
|
||||
and(eax, 3);
|
||||
shl(eax, 5);
|
||||
|
@ -741,7 +741,7 @@ void GPUDrawScanlineCodeGenerator::AlphaBlend()
|
|||
// xmm7 = test
|
||||
// xmm0, xmm2 = free
|
||||
|
||||
// GSVector4i r = (d & 0x001f001f) << 3;
|
||||
// GSVector4i r = (fd & 0x001f001f) << 3;
|
||||
|
||||
pcmpeqd(xmm0, xmm0);
|
||||
psrlw(xmm0, 11); // 0x001f
|
||||
|
|
|
@ -30,9 +30,6 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
{
|
||||
void operator = (const GPUDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static const uint16 m_dither[4][16];
|
||||
|
||||
GPUScanlineSelector m_sel;
|
||||
GPUScanlineLocalData& m_local;
|
||||
|
||||
|
@ -57,4 +54,7 @@ class GPUDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
|
||||
public:
|
||||
GPUDrawScanlineCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static __aligned(const uint16, 32) m_dither[4][16];
|
||||
};
|
|
@ -28,8 +28,6 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
{
|
||||
void operator = (const GPUSetupPrimCodeGenerator&);
|
||||
|
||||
static const GSVector4 m_shift[3];
|
||||
|
||||
GPUScanlineSelector m_sel;
|
||||
GPUScanlineLocalData& m_local;
|
||||
|
||||
|
@ -37,4 +35,6 @@ class GPUSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
|
||||
public:
|
||||
GPUSetupPrimCodeGenerator(void* param, uint32 key, void* code, size_t maxsize);
|
||||
|
||||
static const GSVector4 m_shift[3];
|
||||
};
|
File diff suppressed because it is too large
Load Diff
|
@ -35,8 +35,6 @@ class GSDrawScanline : public IDrawScanline
|
|||
GSCodeGeneratorFunctionMap<GSSetupPrimCodeGenerator, uint64, SetupPrimPtr> m_sp_map;
|
||||
GSCodeGeneratorFunctionMap<GSDrawScanlineCodeGenerator, uint64, DrawScanlinePtr> m_ds_map;
|
||||
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
template<class T, bool masked>
|
||||
void DrawRectT(const int* RESTRICT row, const int* RESTRICT col, const GSVector4i& r, uint32 c, uint32 m);
|
||||
|
||||
|
@ -54,5 +52,21 @@ public:
|
|||
|
||||
void BeginDraw(const void* param);
|
||||
void EndDraw(const GSRasterizerStats& stats, uint64 frame);
|
||||
void PrintStats() {m_ds_map.PrintStats();}
|
||||
void PrintStats();
|
||||
|
||||
void DrawRect(const GSVector4i& r, const GSVertexSW& v);
|
||||
|
||||
#ifndef JIT_DRAW
|
||||
|
||||
void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan);
|
||||
void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan);
|
||||
|
||||
bool IsEdge() const {return m_global.sel.aa1;}
|
||||
bool IsRect() const {return m_global.sel.IsSolidRect();}
|
||||
|
||||
bool TestAlpha(GSVector4i& test, GSVector4i& fm, GSVector4i& zm, const GSVector4i& ga);
|
||||
void WritePixel(const GSVector4i& src, int addr, int i, uint32 psm);
|
||||
|
||||
#endif
|
||||
};
|
||||
|
|
|
@ -30,9 +30,6 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
{
|
||||
void operator = (const GSDrawScanlineCodeGenerator&);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static const GSVector4 m_log2_coef[4];
|
||||
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
|
||||
|
@ -84,4 +81,7 @@ class GSDrawScanlineCodeGenerator : public GSCodeGenerator
|
|||
|
||||
public:
|
||||
GSDrawScanlineCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
static const GSVector4i m_test[8];
|
||||
static const GSVector4 m_log2_coef[4];
|
||||
};
|
||||
|
|
|
@ -1648,8 +1648,8 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
|
||||
if(m_sel.colclamp == 0)
|
||||
{
|
||||
// c[0] &= 0x000000ff;
|
||||
// c[1] &= 0x000000ff;
|
||||
// c[0] &= 0x00ff00ff;
|
||||
// c[1] &= 0x00ff00ff;
|
||||
|
||||
vpcmpeqd(xmm15, xmm15);
|
||||
vpsrlw(xmm15, 8);
|
||||
|
|
|
@ -320,9 +320,10 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vaddps(xmm0, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
|
||||
vmovaps(ptr[&m_local.temp.z], xmm0);
|
||||
vmovaps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
vmovaps(ptr[&m_local.temp.zo], xmm2);
|
||||
vaddps(xmm0, xmm2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -343,6 +344,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
if(m_sel.edge)
|
||||
{
|
||||
// m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9);
|
||||
|
||||
vpshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
vpsrlw(xmm3, 9);
|
||||
|
@ -457,9 +460,10 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
if(m_sel.zb)
|
||||
{
|
||||
vmovaps(xmm0, ptr[&m_local.temp.z]);
|
||||
vmovaps(xmm0, ptr[&m_local.temp.zo]);
|
||||
vaddps(xmm0, ptr[&m_local.d4.z]);
|
||||
vmovaps(ptr[&m_local.temp.z], xmm0);
|
||||
vmovaps(ptr[&m_local.temp.zo], xmm0);
|
||||
vaddps(xmm0, ptr[&m_local.temp.z]);
|
||||
}
|
||||
|
||||
// f = f.add16(m_local.d4.f);
|
||||
|
@ -1184,34 +1188,35 @@ return;
|
|||
|
||||
vmovq(xmm4, ptr[&m_local.gd->t.minmax]);
|
||||
|
||||
vmovq(xmm2, ptr[&m_local.temp.uv[0].u32[0]]);
|
||||
vmovdqa(xmm2, ptr[&m_local.temp.uv[0]]);
|
||||
vmovdqa(xmm5, xmm2);
|
||||
vmovdqa(xmm3, ptr[&m_local.temp.uv[1]]);
|
||||
vmovdqa(xmm6, xmm3);
|
||||
|
||||
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]);
|
||||
vpsrad(xmm2, xmm0);
|
||||
vpsrlw(xmm1, xmm4, xmm0);
|
||||
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1);
|
||||
|
||||
vmovq(xmm3, ptr[&m_local.temp.uv[0].u32[2]]);
|
||||
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]);
|
||||
vpsrad(xmm3, xmm0);
|
||||
vpsrad(xmm5, xmm0);
|
||||
vpsrlw(xmm1, xmm4, xmm0);
|
||||
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1);
|
||||
|
||||
vmovq(xmm5, ptr[&m_local.temp.uv[1].u32[0]]);
|
||||
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]);
|
||||
vpsrad(xmm5, xmm0);
|
||||
vpsrad(xmm3, xmm0);
|
||||
vpsrlw(xmm1, xmm4, xmm0);
|
||||
vmovq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1);
|
||||
|
||||
vmovq(xmm6, ptr[&m_local.temp.uv[1].u32[2]]);
|
||||
vmovd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]);
|
||||
vpsrad(xmm6, xmm0);
|
||||
vpsrlw(xmm1, xmm4, xmm0);
|
||||
vmovq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1);
|
||||
|
||||
vpunpckldq(xmm2, xmm3);
|
||||
vpunpckldq(xmm5, xmm6);
|
||||
vpunpckhqdq(xmm3, xmm2, xmm5);
|
||||
vpunpcklqdq(xmm2, xmm5);
|
||||
vpunpckhdq(xmm5, xmm6);
|
||||
vpunpckhdq(xmm3, xmm2, xmm5);
|
||||
vpunpckldq(xmm2, xmm5);
|
||||
|
||||
vmovdqa(ptr[&m_local.temp.uv[0]], xmm2);
|
||||
vmovdqa(ptr[&m_local.temp.uv[1]], xmm3);
|
||||
|
@ -2573,8 +2578,8 @@ void GSDrawScanlineCodeGenerator::WriteFrame()
|
|||
|
||||
if(m_sel.colclamp == 0)
|
||||
{
|
||||
// c[0] &= 0x000000ff;
|
||||
// c[1] &= 0x000000ff;
|
||||
// c[0] &= 0x00ff00ff;
|
||||
// c[1] &= 0x00ff00ff;
|
||||
|
||||
vpcmpeqd(xmm7, xmm7);
|
||||
vpsrlw(xmm7, 8);
|
||||
|
|
|
@ -320,9 +320,10 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
// z = vp.zzzz() + m_local.d[skip].z;
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
addps(xmm0, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
|
||||
movaps(ptr[&m_local.temp.z], xmm0);
|
||||
movaps(xmm2, ptr[edx + offsetof(GSScanlineLocalData::skip, z)]);
|
||||
movaps(ptr[&m_local.temp.zo], xmm2);
|
||||
addps(xmm0, xmm2);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -343,6 +344,8 @@ void GSDrawScanlineCodeGenerator::Init()
|
|||
|
||||
if(m_sel.edge)
|
||||
{
|
||||
// m_local.temp.cov = GSVector4i::cast(v.t).zzzzh().wwww().srl16(9);
|
||||
|
||||
pshufhw(xmm3, xmm4, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
psrlw(xmm3, 9);
|
||||
|
@ -460,9 +463,10 @@ void GSDrawScanlineCodeGenerator::Step()
|
|||
|
||||
if(m_sel.zb)
|
||||
{
|
||||
movaps(xmm0, ptr[&m_local.temp.z]);
|
||||
movaps(xmm0, ptr[&m_local.temp.zo]);
|
||||
addps(xmm0, ptr[&m_local.d4.z]);
|
||||
movaps(ptr[&m_local.temp.z], xmm0);
|
||||
movaps(ptr[&m_local.temp.zo], xmm0);
|
||||
addps(xmm0, ptr[&m_local.temp.z]);
|
||||
}
|
||||
|
||||
// f = f.add16(m_local.d4.f);
|
||||
|
@ -1232,28 +1236,29 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
|
||||
movq(xmm4, ptr[&m_local.gd->t.minmax]);
|
||||
|
||||
movq(xmm2, ptr[&m_local.temp.uv[0].u32[0]]);
|
||||
movdqa(xmm2, ptr[&m_local.temp.uv[0]]);
|
||||
movdqa(xmm5, xmm2);
|
||||
movdqa(xmm3, ptr[&m_local.temp.uv[1]]);
|
||||
movdqa(xmm6, xmm3);
|
||||
|
||||
movd(xmm0, ptr[&m_local.temp.lod.i.u32[0]]);
|
||||
psrad(xmm2, xmm0);
|
||||
movdqa(xmm1, xmm4);
|
||||
psrlw(xmm1, xmm0);
|
||||
movq(ptr[&m_local.temp.uv_minmax[0].u32[0]], xmm1);
|
||||
|
||||
movq(xmm3, ptr[&m_local.temp.uv[0].u32[2]]);
|
||||
movd(xmm0, ptr[&m_local.temp.lod.i.u32[1]]);
|
||||
psrad(xmm3, xmm0);
|
||||
psrad(xmm5, xmm0);
|
||||
movdqa(xmm1, xmm4);
|
||||
psrlw(xmm1, xmm0);
|
||||
movq(ptr[&m_local.temp.uv_minmax[1].u32[0]], xmm1);
|
||||
|
||||
movq(xmm5, ptr[&m_local.temp.uv[1].u32[0]]);
|
||||
movd(xmm0, ptr[&m_local.temp.lod.i.u32[2]]);
|
||||
psrad(xmm5, xmm0);
|
||||
psrad(xmm3, xmm0);
|
||||
movdqa(xmm1, xmm4);
|
||||
psrlw(xmm1, xmm0);
|
||||
movq(ptr[&m_local.temp.uv_minmax[0].u32[2]], xmm1);
|
||||
|
||||
movq(xmm6, ptr[&m_local.temp.uv[1].u32[2]]);
|
||||
movd(xmm0, ptr[&m_local.temp.lod.i.u32[3]]);
|
||||
psrad(xmm6, xmm0);
|
||||
movdqa(xmm1, xmm4);
|
||||
|
@ -1261,10 +1266,10 @@ void GSDrawScanlineCodeGenerator::SampleTextureLOD()
|
|||
movq(ptr[&m_local.temp.uv_minmax[1].u32[2]], xmm1);
|
||||
|
||||
punpckldq(xmm2, xmm3);
|
||||
punpckldq(xmm5, xmm6);
|
||||
punpckhdq(xmm5, xmm6);
|
||||
movdqa(xmm3, xmm2);
|
||||
punpcklqdq(xmm2, xmm5);
|
||||
punpckhqdq(xmm3, xmm5);
|
||||
punpckldq(xmm2, xmm5);
|
||||
punpckhdq(xmm3, xmm5);
|
||||
|
||||
movdqa(ptr[&m_local.temp.uv[0]], xmm2);
|
||||
movdqa(ptr[&m_local.temp.uv[1]], xmm3);
|
||||
|
|
|
@ -180,27 +180,40 @@ void GSRasterizer::DrawLine(const GSVertexSW* v)
|
|||
|
||||
GSVector4 mask = (v[0].p > v[1].p).xxxx();
|
||||
|
||||
GSVertexSW l, dl;
|
||||
GSVertexSW scan;
|
||||
|
||||
l.p = v[0].p.blend32(v[1].p, mask);
|
||||
l.t = v[0].t.blend32(v[1].t, mask);
|
||||
l.c = v[0].c.blend32(v[1].c, mask);
|
||||
scan.p = v[0].p.blend32(v[1].p, mask);
|
||||
scan.t = v[0].t.blend32(v[1].t, mask);
|
||||
scan.c = v[0].c.blend32(v[1].c, mask);
|
||||
|
||||
GSVector4 r;
|
||||
GSVector4i p(scan.p);
|
||||
|
||||
r = v[1].p.blend32(v[0].p, mask);
|
||||
|
||||
GSVector4i p(l.p);
|
||||
|
||||
if(m_scissor.top <= p.y && p.y < m_scissor.bottom)
|
||||
if(m_scissor.top <= p.y && p.y < m_scissor.bottom && IsOneOfMyScanlines(p.y))
|
||||
{
|
||||
GSVector4 scissor = m_fscissor.xzxz();
|
||||
|
||||
GSVector4 lrf = scan.p.upl(v[1].p.blend32(v[0].p, mask)).ceil();
|
||||
GSVector4 l = lrf.max(scissor);
|
||||
GSVector4 r = lrf.min(scissor);
|
||||
GSVector4i lr = GSVector4i(l.xxyy(r));
|
||||
|
||||
int left = lr.extract32<0>();
|
||||
int right = lr.extract32<2>();
|
||||
|
||||
int pixels = right - left;
|
||||
|
||||
if(pixels > 0)
|
||||
{
|
||||
m_stats.pixels += pixels;
|
||||
|
||||
GSVertexSW dscan = dv / dv.p.xxxx();
|
||||
|
||||
l.p = l.p.upl(r).xyzw(l.p); // r.x => l.y
|
||||
scan += dscan * (l - scan.p).xxxx();
|
||||
|
||||
DrawTriangleSection(p.y, p.y + 1, l, dl, dscan, l.p.xxxx());
|
||||
m_ds->SetupPrim(v, dscan);
|
||||
|
||||
Flush(v, dscan);
|
||||
m_ds->DrawScanline(pixels, left, p.y, scan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -277,6 +290,10 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
|
||||
int i = (y0011 == y1221).mask() & 7;
|
||||
|
||||
// if(i == 0) => y0 < y1 < y2
|
||||
// if(i == 1) => y0 == y1 < y2
|
||||
// if(i == 4) => y0 < y1 == y2
|
||||
|
||||
if(i == 7) return; // y0 == y1 == y2
|
||||
|
||||
GSVector4 tbf = y0011.xzxz(y1221).ceil();
|
||||
|
@ -338,66 +355,41 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
dscan.c = _r.ywyw(_g).hsub(_b.ywyw(_a)); // dy0 * r1 - dy1 * r0, dy0 * g1 - dy1 * g0, dy0 * b1 - dy1 * b0, dy0 * a1 - dy1 * a0
|
||||
dedge.c = _r.zxzx(_g).hsub(_b.zxzx(_a)); // dx1 * r0 - dx0 * r1, dx1 * g0 - dx0 * g1, dx1 * b0 - dx0 * b1, dx1 * a0 - dx0 * a1
|
||||
|
||||
GSVector4 x0;
|
||||
|
||||
switch(i)
|
||||
if(i & 1)
|
||||
{
|
||||
case 0: // y0 < y1 < y2
|
||||
case 4: // y0 < y1 == y2
|
||||
if(tb.y < tb.w)
|
||||
{
|
||||
edge = v[1 - j];
|
||||
|
||||
x0 = v[0].p.xxxx();
|
||||
edge.p = edge.p.insert<0, 1>(v[j].p);
|
||||
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
|
||||
|
||||
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p.xxxx(), v[1 - j].p.yyyy());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4 x0 = v[0].p.xxxx();
|
||||
|
||||
if(tb.x < tb.z)
|
||||
{
|
||||
edge = v[0];
|
||||
|
||||
GSVector4 dy = tbmax.xxxx() - edge.p.yyyy();
|
||||
|
||||
edge.p = edge.p.xxzw();
|
||||
dedge.p = ddx[j].xyzw(dedge.p);
|
||||
|
||||
edge += dedge * dy;
|
||||
|
||||
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, x0);
|
||||
DrawTriangleSection(tb.x, tb.z, edge, dedge, dscan, x0, v[0].p.yyyy());
|
||||
}
|
||||
|
||||
if(tb.y < tb.w)
|
||||
{
|
||||
edge = v[1];
|
||||
|
||||
GSVector4 dy = tbmax.zzzz() - edge.p.yyyy();
|
||||
|
||||
edge.p = (x0 + ddx[j] * dv[0].p.yyyy()).xyzw(edge.p);
|
||||
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
|
||||
|
||||
edge += dedge * dy;
|
||||
|
||||
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p.xxxx());
|
||||
DrawTriangleSection(tb.y, tb.w, edge, dedge, dscan, v[1].p.xxxx(), v[1].p.yyyy());
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
case 1: // y0 == y1 < y2
|
||||
|
||||
if(tb.y < tb.w)
|
||||
{
|
||||
edge = v[1 - j];
|
||||
|
||||
GSVector4 dy = tbmax.xxxx() - edge.p.yyyy();
|
||||
|
||||
edge.p = edge.p.insert<0, 1>(v[j].p);
|
||||
dedge.p = ddx[2 - (j << 1)].yzzw(dedge.p);
|
||||
|
||||
edge += dedge * dy;
|
||||
|
||||
DrawTriangleSection(tb.x, tb.w, edge, dedge, dscan, v[1 - j].p.xxxx());
|
||||
}
|
||||
|
||||
break;
|
||||
|
||||
default:
|
||||
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
Flush(v, dscan);
|
||||
|
@ -419,7 +411,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertices)
|
|||
}
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& x0)
|
||||
void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& x0, const GSVector4& y0)
|
||||
{
|
||||
ASSERT(top < bottom);
|
||||
ASSERT(edge.p.x <= edge.p.y);
|
||||
|
@ -432,7 +424,9 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
{
|
||||
if(IsOneOfMyScanlines(top))
|
||||
{
|
||||
GSVector4 lrf = edge.p.ceil();
|
||||
GSVertexSW scan = edge + dedge * (GSVector4(top) - y0);
|
||||
|
||||
GSVector4 lrf = scan.p.ceil();
|
||||
GSVector4 l = lrf.max(scissor);
|
||||
GSVector4 r = lrf.min(scissor);
|
||||
GSVector4i lr = GSVector4i(l.xxyy(r));
|
||||
|
@ -448,13 +442,11 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
|
||||
GSVector4 prestep = l.xxxx() - x0;
|
||||
|
||||
AddScanline(e++, pixels, left, top, edge + dscan * prestep);
|
||||
AddScanline(e++, pixels, left, top, scan + dscan * prestep);
|
||||
}
|
||||
}
|
||||
|
||||
if(++top >= bottom) break;
|
||||
|
||||
edge += dedge;
|
||||
}
|
||||
|
||||
m_edge.count += e - &m_edge.buff[m_edge.count];
|
||||
|
|
|
@ -27,6 +27,9 @@
|
|||
#include "GSThread.h"
|
||||
#include "GSAlignedClass.h"
|
||||
|
||||
//
|
||||
#define JIT_DRAW
|
||||
|
||||
__aligned(class, 32) GSRasterizerData
|
||||
{
|
||||
public:
|
||||
|
@ -62,11 +65,22 @@ public:
|
|||
virtual void EndDraw(const GSRasterizerStats& stats, uint64 frame) = 0;
|
||||
virtual void PrintStats() = 0;
|
||||
|
||||
#ifdef JIT_DRAW
|
||||
|
||||
__forceinline void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) {m_sp(vertices, dscan);}
|
||||
__forceinline void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) {m_ds(pixels, left, top, scan);}
|
||||
__forceinline void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) {m_de(pixels, left, top, scan);}
|
||||
__forceinline void DrawRect(const GSVector4i& r, const GSVertexSW& v) {(this->*m_dr)(r, v);}
|
||||
|
||||
#else
|
||||
|
||||
virtual void SetupPrim(const GSVertexSW* vertices, const GSVertexSW& dscan) = 0;
|
||||
virtual void DrawScanline(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
||||
virtual void DrawEdge(int pixels, int left, int top, const GSVertexSW& scan) = 0;
|
||||
virtual void DrawRect(const GSVector4i& r, const GSVertexSW& v) = 0;
|
||||
|
||||
#endif
|
||||
|
||||
__forceinline bool IsEdge() const {return m_de != NULL;}
|
||||
__forceinline bool IsRect() const {return m_dr != NULL;}
|
||||
};
|
||||
|
@ -102,7 +116,7 @@ protected:
|
|||
void DrawTriangle(const GSVertexSW* v);
|
||||
void DrawSprite(const GSVertexSW* v);
|
||||
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& x0);
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& x0, const GSVector4& y0);
|
||||
|
||||
void DrawEdge(const GSVertexSW& v0, const GSVertexSW& v1, const GSVertexSW& dv, int orientation, int side);
|
||||
|
||||
|
|
|
@ -552,7 +552,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
{
|
||||
v[i].t *= w;
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
v[i].t = (t * w).xyzw(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -562,10 +564,13 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
for(int i = 0, j = m_count; i < j; i += 2)
|
||||
{
|
||||
GSVector4 w = v[i + 1].t.zzzz().rcpnr();
|
||||
GSVector4 t0 = v[i + 0].t;
|
||||
GSVector4 t1 = v[i + 1].t;
|
||||
|
||||
v[i + 0].t *= w;
|
||||
v[i + 1].t *= w;
|
||||
GSVector4 w = t1.zzzz().rcpnr();
|
||||
|
||||
v[i + 0].t = (t0 * w).xyzw(t0);
|
||||
v[i + 1].t = (t1 * w).xyzw(t1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -582,7 +587,9 @@ bool GSRendererSW::GetScanlineGlobalData(GSScanlineGlobalData& gd)
|
|||
|
||||
for(int i = 0, j = m_count; i < j; i++)
|
||||
{
|
||||
v[i].t -= half;
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
v[i].t = (t - half).xyzw(t);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -146,8 +146,9 @@ __aligned(struct, 32) GSScanlineLocalData // per prim variables, each thread has
|
|||
|
||||
struct
|
||||
{
|
||||
GSVector4i z, f;
|
||||
GSVector4i s, t, q;
|
||||
GSVector4 z, zo;
|
||||
GSVector4i f;
|
||||
GSVector4 s, t, q;
|
||||
GSVector4i rb, ga;
|
||||
GSVector4i zs, zd;
|
||||
GSVector4i uf, vf;
|
||||
|
|
|
@ -28,8 +28,6 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
{
|
||||
void operator = (const GSSetupPrimCodeGenerator&);
|
||||
|
||||
static const GSVector4 m_shift[5];
|
||||
|
||||
GSScanlineSelector m_sel;
|
||||
GSScanlineLocalData& m_local;
|
||||
|
||||
|
@ -43,4 +41,6 @@ class GSSetupPrimCodeGenerator : public GSCodeGenerator
|
|||
|
||||
public:
|
||||
GSSetupPrimCodeGenerator(void* param, uint64 key, void* code, size_t maxsize);
|
||||
|
||||
static const GSVector4 m_shift[5];
|
||||
};
|
||||
|
|
|
@ -156,7 +156,16 @@ bool GSTextureSW::Save(const string& fn, bool dds)
|
|||
|
||||
for(int h = m_size.y; h > 0; h--, data -= m_pitch)
|
||||
{
|
||||
fwrite(data, 1, m_size.x << 2, fp); // TODO: swap red-blue?
|
||||
for(int i = 0; i < m_size.x; i++)
|
||||
{
|
||||
uint32 c = ((uint32*)data)[i];
|
||||
|
||||
c = (c & 0xff00ff00) | ((c & 0x00ff0000) >> 16) | ((c & 0x000000ff) << 16);
|
||||
|
||||
fwrite(&c, 1, sizeof(c), fp);
|
||||
}
|
||||
|
||||
// fwrite(data, 1, m_size.x << 2, fp); // TODO: swap red-blue?
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
|
|
|
@ -151,7 +151,7 @@ public:
|
|||
this->m = m;
|
||||
}
|
||||
|
||||
__forceinline explicit GSVector4i(const GSVector4& v);
|
||||
__forceinline explicit GSVector4i(const GSVector4& v, bool truncate = true);
|
||||
|
||||
__forceinline void operator = (const GSVector4i& v)
|
||||
{
|
||||
|
@ -796,41 +796,81 @@ public:
|
|||
return GSVector4i(_mm_srai_epi16(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sra16(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_sra_epi16(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sra32(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_srai_epi32(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sra32(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_sra_epi32(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sll16(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_slli_epi16(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sll16(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_sll_epi16(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sll32(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_slli_epi32(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sll32(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_sll_epi32(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sll64(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_slli_epi64(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i sll64(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_sll_epi64(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i srl16(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_srli_epi16(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i srl16(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_srl_epi16(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i srl32(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_srli_epi32(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i srl32(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_srl_epi32(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i srl64(int i) const
|
||||
{
|
||||
return GSVector4i(_mm_srli_epi64(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i srl64(__m128i i) const
|
||||
{
|
||||
return GSVector4i(_mm_srl_epi64(m, i));
|
||||
}
|
||||
|
||||
__forceinline GSVector4i add8(const GSVector4i& v) const
|
||||
{
|
||||
return GSVector4i(_mm_add_epi8(m, v.m));
|
||||
|
@ -3109,9 +3149,9 @@ public:
|
|||
VECTOR4_SHUFFLE_1(w, 3)
|
||||
};
|
||||
|
||||
__forceinline GSVector4i::GSVector4i(const GSVector4& v)
|
||||
__forceinline GSVector4i::GSVector4i(const GSVector4& v, bool truncate)
|
||||
{
|
||||
m = _mm_cvttps_epi32(v);
|
||||
m = truncate ? _mm_cvttps_epi32(v) : _mm_cvtps_epi32(v);
|
||||
}
|
||||
|
||||
__forceinline GSVector4::GSVector4(const GSVector4i& v)
|
||||
|
@ -3158,7 +3198,7 @@ public:
|
|||
|
||||
__forceinline GSVector8i() {}
|
||||
|
||||
__forceinline explicit GSVector8i(const GSVector8& v);
|
||||
__forceinline explicit GSVector8i(const GSVector8& v, bool truncate = true);
|
||||
|
||||
static GSVector8i cast(const GSVector8& v);
|
||||
|
||||
|
@ -4158,9 +4198,9 @@ public:
|
|||
|
||||
#if _M_SSE >= 0x500
|
||||
|
||||
__forceinline GSVector8i::GSVector8i(const GSVector8& v)
|
||||
__forceinline GSVector8i::GSVector8i(const GSVector8& v, bool truncate)
|
||||
{
|
||||
m = _mm256_cvttps_epi32(v);
|
||||
m = truncate ? _mm256_cvttps_epi32(v) : _mm256_cvtps_epi32(v);
|
||||
}
|
||||
|
||||
__forceinline GSVector8::GSVector8(const GSVector8i& v)
|
||||
|
@ -4180,10 +4220,10 @@ __forceinline GSVector8 GSVector8::cast(const GSVector8i& v)
|
|||
|
||||
#else
|
||||
|
||||
__forceinline GSVector8i::GSVector8i(const GSVector8& v)
|
||||
__forceinline GSVector8i::GSVector8i(const GSVector8& v, bool truncate)
|
||||
{
|
||||
m[0] = _mm_cvttps_epi32(v.m[0]);
|
||||
m[1] = _mm_cvttps_epi32(v.m[1]);
|
||||
m[0] = truncate ? _mm_cvttps_epi32(v.m[0]) : _mm_cvtps_epi32(v.m[0]);
|
||||
m[1] = truncate ? _mm_cvttps_epi32(v.m[1]) : _mm_cvtps_epi32(v.m[1]);
|
||||
}
|
||||
|
||||
__forceinline GSVector8::GSVector8(const GSVector8i& v)
|
||||
|
|
Loading…
Reference in New Issue