GSdx: Simplified vertex formats and the related code, everything works with the basic GSVertex until it gets uploaded to the vertex buffer.

git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5074 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
gabest11 2012-01-19 04:53:36 +00:00
parent d763bf6616
commit 9ec7f14fa8
39 changed files with 503 additions and 2886 deletions

View File

@ -1103,13 +1103,13 @@ __aligned(struct, 32) GIFPath
enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2};
void SetTag(const void* mem)
__forceinline void SetTag(const void* mem)
{
GSVector4i v = GSVector4i::load<false>(mem);
GSVector4i::store<true>(&tag, v);
reg = 0;
regs = v.uph8(v >> 4) & 0x0f0f0f0f;
nreg = tag.NREG ? tag.NREG : 16;
regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg);
nloop = tag.NLOOP;
type = TYPE_UNKNOWN;
if(regs.u32[0] == 0x00040102 && nreg == 3) type = TYPE_STQRGBAXYZF2;

View File

@ -720,6 +720,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
}
void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{
void* ptr = NULL;
if(IAMapVertexBuffer(&ptr, stride, count))
{
GSVector4i::storent(ptr, vertex, count * stride);
IAUnmapVertexBuffer();
}
}
bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
{
ASSERT(m_vertex.count == 0);
@ -747,7 +759,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
hr = m_dev->CreateBuffer(&bd, NULL, &m_vb);
if(FAILED(hr)) return;
if(FAILED(hr)) return false;
}
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
@ -761,17 +773,24 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
D3D11_MAPPED_SUBRESOURCE m;
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m)))
{
GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
m_ctx->Unmap(m_vb, 0);
return false;
}
*vertex = (uint8*)m.pData + m_vertex.start * stride;
m_vertex.count = count;
m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride);
return true;
}
void GSDevice11::IAUnmapVertexBuffer()
{
m_ctx->Unmap(m_vb, 0);
IASetVertexBuffer(m_vb, m_vertex.stride);
}
void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)

View File

@ -170,6 +170,8 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count);
void IAUnmapVertexBuffer();
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(ID3D11Buffer* ib);
@ -190,7 +192,6 @@ public:
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
void OMSetRenderTargets(const GSVector2i& rtsize, ID3D11UnorderedAccessView** uav, int count, const GSVector4i* scissor = NULL);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel);
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);

View File

@ -911,6 +911,18 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
}
void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
{
void* ptr = NULL;
if(IAMapVertexBuffer(&ptr, stride, count))
{
GSVector4i::storent(ptr, vertex, count * stride);
IAUnmapVertexBuffer();
}
}
bool GSDevice9::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
{
ASSERT(m_vertex.count == 0);
@ -930,7 +942,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun
hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
if(FAILED(hr)) return;
if(FAILED(hr)) return false;
}
uint32 flags = D3DLOCK_NOOVERWRITE;
@ -942,19 +954,22 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun
flags = D3DLOCK_DISCARD;
}
void* ptr = NULL;
if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
if(FAILED(m_vb->Lock(m_vertex.start * stride, count * stride, vertex, flags)))
{
GSVector4i::storent(ptr, vertex, count * stride);
m_vb->Unlock();
return false;
}
m_vertex.count = count;
m_vertex.stride = stride;
IASetVertexBuffer(m_vb, stride);
return true;
}
void GSDevice9::IAUnmapVertexBuffer()
{
m_vb->Unlock();
IASetVertexBuffer(m_vb, m_vertex.stride);
}
void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)

View File

@ -196,6 +196,8 @@ public:
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count);
void IAUnmapVertexBuffer();
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
void IASetIndexBuffer(const void* index, size_t count);
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
@ -216,7 +218,6 @@ public:
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
void SetupGS(GSSelector sel) {}
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);

View File

@ -279,7 +279,6 @@ public:
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;}
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
virtual void SetupGS(GSSelector sel) = 0;
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;

View File

@ -22,9 +22,8 @@
#include "stdafx.h"
#include "GSRenderer.h"
GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
: GSState(vt, vertex_stride)
, m_dev(NULL)
GSRenderer::GSRenderer()
: m_dev(NULL)
, m_shader(0)
{
m_GStitleInfoBuffer[0] = 0;

View File

@ -58,7 +58,7 @@ public:
int s_saven;
public:
GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
GSRenderer();
virtual ~GSRenderer();
virtual bool CreateWnd(const string& title, int w, int h);

View File

@ -23,12 +23,10 @@
#include "GSRendererCS.h"
GSRendererCS::GSRendererCS()
: GSRenderer(new GSVertexTraceDX11(this), sizeof(GSVertexHW11))
: GSRenderer()
{
m_nativeres = true;
InitConvertVertex(GSRendererCS);
memset(m_vm_valid, 0, sizeof(m_vm_valid));
}
@ -233,26 +231,6 @@ GSTexture* GSRendererCS::GetOutput(int i)
return NULL;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
GSVector4i v0 = ((GSVector4i*)s)[0];
GSVector4i v1 = ((GSVector4i*)s)[1];
if(tme && fst)
{
// TODO: modify VertexTrace to read uv from v1.u16[0], v1.u16[1], then this step is not needed
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
}
((GSVector4i*)d)[0] = v0;
((GSVector4i*)d)[1] = v1;
}
void GSRendererCS::Draw()
{
GSDrawingEnvironment& env = m_env;
@ -260,7 +238,7 @@ void GSRendererCS::Draw()
GSVector2i rtsize(2048, 2048);
GSVector4i scissor = GSVector4i(context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
GSVector4i r = bbox.rintersect(scissor);
uint32 fm = context->FRAME.FBMSK;
@ -286,7 +264,7 @@ void GSRendererCS::Draw()
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
// TODO: unswizzle pages of r to a texture, check m_vm_valid, bit not set cpu->gpu, set gpu->gpu
@ -317,7 +295,7 @@ void GSRendererCS::Draw()
D3D11_PRIMITIVE_TOPOLOGY topology;
switch(m_vt->m_primclass)
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
@ -333,7 +311,7 @@ void GSRendererCS::Draw()
__assume(0);
}
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertexHW11), m_vertex.next);
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next);
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
dev->IASetPrimitiveTopology(topology);
@ -407,7 +385,7 @@ void GSRendererCS::Draw()
GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt->m_primclass;
gs_sel.prim = m_vt.m_primclass;
CComPtr<ID3D11GeometryShader> gs;

View File

@ -120,9 +120,6 @@ class GSRendererCS : public GSRenderer
bool GetOffsetBuffer(OffsetBuffer** fzbo);
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
bool CreateDevice(GSDevice* dev);
void VSync(int field);
GSTexture* GetOutput(int i);

View File

@ -23,10 +23,9 @@
#include "GSRendererDX.h"
#include "GSDeviceDX.h"
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
: GSRendererHW(vt, vertex_stride, tc)
GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter)
: GSRendererHW(tc)
, m_pixelcenter(pixelcenter)
, m_topology(-1)
{
m_logz = !!theApp.GetConfig("logz", 0);
m_fba = !!theApp.GetConfig("fba", 1);
@ -61,7 +60,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =
@ -111,7 +110,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
if(!IsOpaque())
{
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
om_bsel.a = context->ALPHA.A;
om_bsel.b = context->ALPHA.B;
@ -154,11 +153,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
{
if(context->ZBUF.PSM == PSM_PSMZ24)
{
if(m_vt->m_max.p.z > 0xffffff)
if(m_vt.m_max.p.z > 0xffffff)
{
ASSERT(m_vt->m_min.p.z > 0xffffff);
ASSERT(m_vt.m_min.p.z > 0xffffff);
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
if (m_vt->m_min.p.z > 0xffffff)
if (m_vt.m_min.p.z > 0xffffff)
{
vs_sel.bppz = 1;
om_dssel.ztst = ZTST_ALWAYS;
@ -167,11 +166,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
}
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
{
if(m_vt->m_max.p.z > 0xffff)
if(m_vt.m_max.p.z > 0xffff)
{
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
// Fixme : Same as above, I guess.
if (m_vt->m_min.p.z > 0xffff)
if (m_vt.m_min.p.z > 0xffff)
{
vs_sel.bppz = 2;
om_dssel.ztst = ZTST_ALWAYS;
@ -213,7 +212,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
GSDeviceDX::GSSelector gs_sel;
gs_sel.iip = PRIM->IIP;
gs_sel.prim = m_vt->m_primclass;
gs_sel.prim = m_vt.m_primclass;
// ps
@ -281,7 +280,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
ps_sel.aem = env.TEXA.AEM;
ps_sel.tfx = context->TEX0.TFX;
ps_sel.tcc = context->TEX0.TCC;
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
ps_sel.rt = tex->m_target;
int w = tex->m_texture->GetWidth();
@ -331,7 +330,6 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
uint8 afix = context->ALPHA.FIX;
dev->SetupOM(om_dssel, om_bsel, afix);
dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
dev->SetupVS(vs_sel, &vs_cb);
dev->SetupGS(gs_sel);
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);

View File

@ -32,13 +32,11 @@ class GSRendererDX : public GSRendererHW
bool UserHacks_AlphaHack;
protected:
int m_topology;
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
virtual void UpdateFBA(GSTexture* rt) {}
public:
GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
virtual ~GSRendererDX();
};

View File

@ -25,9 +25,8 @@
#include "resource.h"
GSRendererDX11::GSRendererDX11()
: GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
: GSRendererDX(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
{
InitConvertVertex(GSRendererDX11);
}
bool GSRendererDX11::CreateDevice(GSDevice* dev)
@ -38,43 +37,40 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
return true;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
GSVector4i v0 = ((GSVector4i*)s)[0];
GSVector4i v1 = ((GSVector4i*)s)[1];
if(tme && fst)
{
// TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
}
((GSVector4i*)d)[0] = v0;
((GSVector4i*)d)[1] = v1;
}
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt->m_primclass)
GSDevice11* dev = (GSDevice11*)m_dev;
void* ptr = NULL;
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
{
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
dev->IAUnmapVertexBuffer();
}
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
D3D11_PRIMITIVE_TOPOLOGY t;
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
break;
case GS_TRIANGLE_CLASS:
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
break;
default:
__assume(0);
}
dev->IASetPrimitiveTopology(t);
__super::DrawPrims(rt, ds, tex);
}

View File

@ -28,15 +28,8 @@
class GSRendererDX11 : public GSRendererDX
{
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
public:
GSRendererDX11();
virtual ~GSRendererDX11() {}

View File

@ -25,9 +25,8 @@
#include "resource.h"
GSRendererDX9::GSRendererDX9()
: GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
: GSRendererDX(new GSTextureCache9(this))
{
InitConvertVertex(GSRendererDX9);
}
bool GSRendererDX9::CreateDevice(GSDevice* dev)
@ -57,56 +56,21 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
return true;
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
if(tme && !fst)
{
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
}
else
{
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
}
GSVector4 t = GSVector4::zero();
if(tme)
{
if(fst)
{
t = GSVector4(GSVector4i::load(s->UV).upl16());
}
else
{
t = GSVector4::loadl(&s->ST);
}
}
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
d->p = p;
d->t = t;
}
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
{
switch(m_vt->m_primclass)
D3DPRIMITIVETYPE topology;
switch(m_vt.m_primclass)
{
case GS_POINT_CLASS:
m_topology = D3DPT_POINTLIST;
topology = D3DPT_POINTLIST;
break;
case GS_LINE_CLASS:
m_topology = D3DPT_LINELIST;
topology = D3DPT_LINELIST;
if(PRIM->IIP == 0)
{
@ -122,7 +86,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
case GS_TRIANGLE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
topology = D3DPT_TRIANGLELIST;
if(PRIM->IIP == 0)
{
@ -138,7 +102,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
case GS_SPRITE_CLASS:
m_topology = D3DPT_TRIANGLELIST;
topology = D3DPT_TRIANGLELIST;
// each sprite converted to quad needs twice the space
@ -154,29 +118,35 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
size_t count = m_vertex.next;
int i = (int)count * 2 - 4;
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
GSVertex* s = &m_vertex.buff[count - 2];
GSVertex* q = &m_vertex.buff[count * 2 - 4];
uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
{
GSVertexHW9 v0 = s[0];
GSVertexHW9 v1 = s[1];
GSVertex v0 = s[0];
GSVertex v1 = s[1];
v0.p = v0.p.xyzw(v1.p); // z, q
v0.t = v0.t.xyzw(v1.t); // c, f
v0.RGBAQ = v1.RGBAQ;
v0.XYZ.Z = v1.XYZ.Z;
v0.FOG = v1.FOG;
q[0] = v0;
q[3] = v1;
// swap x, s
// swap x, s, u
GSVector4 p = v0.p.insert<0, 0>(v1.p);
GSVector4 t = v0.t.insert<0, 0>(v1.t);
v1.p = v1.p.insert<0, 0>(v0.p);
v1.t = v1.t.insert<0, 0>(v0.t);
v0.p = p;
v0.t = t;
uint16 x = v0.XYZ.X;
v0.XYZ.X = v1.XYZ.X;
v1.XYZ.X = x;
float s = v0.ST.S;
v0.ST.S = v1.ST.S;
v1.ST.S = s;
uint16 u = v0.U;
v0.U = v1.U;
v1.U = u;
q[1] = v0;
q[2] = v1;
@ -199,7 +169,56 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
__assume(0);
}
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
GSDevice9* dev = (GSDevice9*)m_dev;
(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
void* ptr = NULL;
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
{
GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;
for(int i = 0; i < m_vertex.next; i++, s++, d++)
{
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
if(PRIM->TME && !PRIM->FST)
{
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
}
else
{
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
}
GSVector4 t = GSVector4::zero();
if(PRIM->TME)
{
if(PRIM->FST)
{
t = GSVector4(GSVector4i::load(s->UV).upl16());
}
else
{
t = GSVector4::loadl(&s->ST);
}
}
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
d->p = p;
d->t = t;
}
dev->IAUnmapVertexBuffer();
}
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
dev->IASetPrimitiveTopology(topology);
__super::DrawPrims(rt, ds, tex);
}
@ -220,7 +239,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
GSVector4 o = GSVector4(-1.0f, 1.0f);
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
GSVector4 dst = src * 2.0f + o.xxxx();
GSVertexPT1 vertices[] =

View File

@ -34,17 +34,9 @@ protected:
Direct3DBlendState9 bs;
} m_fba;
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
void UpdateFBA(GSTexture* rt);
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
public:
GSRendererDX9();
virtual ~GSRendererDX9() {}

View File

@ -22,9 +22,8 @@
#include "stdafx.h"
#include "GSRendererHW.h"
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
: GSRenderer(vt, vertex_stride)
, m_tc(tc)
GSRendererHW::GSRendererHW(GSTextureCache* tc)
: m_tc(tc)
, m_width(1024)
, m_height(1024)
, m_skip(0)
@ -211,7 +210,7 @@ void GSRendererHW::Draw()
GSVector4i r;
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
@ -298,7 +297,7 @@ void GSRendererHW::Draw()
//
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
if(fm != 0xffffffff)
{
@ -410,14 +409,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
if(lines == 0)
{
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
if(m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
{
lines = m_vertex.next / 2;
}
}
else
{
if(m_vt->m_primclass == GS_POINT_CLASS)
if(m_vt.m_primclass == GS_POINT_CLASS)
{
if(m_vertex.next >= 16 * 512)
{
@ -428,14 +427,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
int ox = m_context->XYOFFSET.OFX;
int oy = m_context->XYOFFSET.OFY;
const uint8* RESTRICT v = m_vertex.buff;
const GSVertex* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
for(int i = (int)m_vertex.next; i >= 0; i--, v++)
{
int x = (GetPosX(v) - ox) >> 4;
int y = (GetPosY(v) - oy) >> 4;
int x = (v->XYZ.X - ox) >> 4;
int y = (v->XYZ.Y - oy) >> 4;
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0];
}
return false;
@ -445,7 +444,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
lines = 0;
}
}
else if(m_vt->m_primclass == GS_LINE_CLASS)
else if(m_vt.m_primclass == GS_LINE_CLASS)
{
if(m_vertex.next == lines * 2)
{
@ -458,10 +457,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
size_t stride = m_vertex.stride;
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2];
m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1];
m_index.buff[0] = 0;
m_index.buff[1] = 1;
@ -473,7 +470,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
m_vertex.head = m_vertex.tail = m_vertex.next = 4;
m_index.tail = 6;
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
}
else
{
@ -505,11 +502,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
{
// missing red channel fix (looks alright in pcsx2 r5000+)
uint8* RESTRICT v = m_vertex.buff;
GSVertex* RESTRICT v = m_vertex.buff;
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
for(int i = (int)m_vertex.next; i >= 0; i--, v++)
{
uint32 c = GetColor(v);
uint32 c = v->RGBAQ.u32[0];
uint32 r = (c >> 0) & 0xff;
uint32 g = (c >> 8) & 0xff;
@ -517,11 +514,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
if(r == 0 && g != 0 && b != 0)
{
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1);
}
}
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt.m_primclass);
return true;
}
@ -701,7 +698,7 @@ bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTex
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_max.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
@ -757,7 +754,7 @@ bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureC
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
@ -783,7 +780,7 @@ bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureC
}
else if(PRIM->TME)
{
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0))
{
m_dev->ClearDepth(ds, 0);
}
@ -797,7 +794,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac
uint32 FBP = m_context->FRAME.Block();
uint32 FPSM = m_context->FRAME.PSM;
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_eq.z)
{
m_context->TEST.ZTST = ZTST_ALWAYS;
//m_dev->ClearDepth(ds, 0);
@ -809,7 +806,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
{
uint32 FBP = m_context->FRAME.Block();
uint32 FBW = m_context->FRAME.FBW;
@ -818,16 +815,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
{
if(m_vertex.next == 16)
{
uint8* RESTRICT v = m_vertex.buff;
GSVertex* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 16; i++, v += m_vertex.stride)
for(int i = 0; i < 16; i++, v++)
{
uint32 c = GetColor(v);
uint32 c = v->RGBAQ.u32[0];
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
v->RGBAQ.u32[0] = c;
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
}
@ -838,16 +835,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
}
else if(m_vertex.next == 256)
{
uint8* RESTRICT v = m_vertex.buff;
GSVertex* RESTRICT v = m_vertex.buff;
for(int i = 0; i < 256; i++, v += m_vertex.stride)
for(int i = 0; i < 256; i++, v++)
{
uint32 c = GetColor(v);
uint32 c = v->RGBAQ.u32[0];
uint32 a = c >> 24;
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
SetColor(v, c);
v->RGBAQ.u32[0] = c;
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
}

View File

@ -126,11 +126,6 @@ private:
} m_hacks;
virtual int GetPosX(const void* vertex) const = 0;
virtual int GetPosY(const void* vertex) const = 0;
virtual uint32 GetColor(const void* vertex) const = 0;
virtual void SetColor(void* vertex, uint32 c) const = 0;
#pragma endregion
protected:
@ -139,7 +134,7 @@ protected:
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
public:
GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
GSRendererHW(GSTextureCache* tc);
virtual ~GSRendererHW();
void SetGameCRC(uint32 crc, int options);

View File

@ -32,11 +32,6 @@ class GSRendererNull : public GSRenderer
};
protected:
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index)
{
}
void Draw()
{
}
@ -48,8 +43,7 @@ protected:
public:
GSRendererNull()
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
: GSRenderer()
{
InitConvertVertex(GSRendererNull);
}
};

View File

@ -29,11 +29,8 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
GSRendererSW::GSRendererSW(int threads)
: GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW))
, m_fzb(NULL)
: m_fzb(NULL)
{
InitConvertVertex(GSRendererSW);
m_nativeres = true; // ignore ini, sw is always native
m_tc = new GSTextureCacheSW(this);
@ -233,73 +230,67 @@ GSTexture* GSRendererSW::GetOutput(int i)
return m_texture[i];
}
template<uint32 prim, uint32 tme, uint32 fst>
void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
{
GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
ASSERT(d->_pad.u32[0] != 0x12345678);
uint32 z = s->XYZ.Z;
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4 p, t, c;
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(tme)
{
if(fst)
{
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
}
}
c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
d->p = p;
d->c = c;
d->t = t;
#ifdef _DEBUG
d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once
#endif
if(prim == GS_SPRITE)
{
d->t.u32[3] = z;
}
}
void GSRendererSW::Draw()
{
SharedData* sd = new SharedData(this);
shared_ptr<GSRasterizerData> data(sd);
sd->primclass = m_vt->m_primclass;
sd->primclass = m_vt.m_primclass;
sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
sd->vertex = (GSVertexSW*)sd->buff;
sd->vertex_count = m_vertex.next;
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
sd->index_count = m_index.tail;
memcpy(sd->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
for(size_t i = 0; i < m_index.tail; i++)
{
ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678);
// TODO: template, JIT
GSVertex* RESTRICT s = m_vertex.buff;
GSVertexSW* RESTRICT d = sd->vertex;
GSVector4i o = (GSVector4i)m_context->XYOFFSET;
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
for(size_t i = 0; i < m_vertex.next; i++, s++, d++)
{
uint32 z = s->XYZ.Z;
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o;
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
GSVector4 p, t, c;
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
if(PRIM->TME)
{
if(PRIM->FST)
{
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
}
else
{
t = GSVector4(s->ST.S, s->ST.T) * tsize;
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
}
}
c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
d->p = p;
d->c = c;
d->t = t;
if(sd->primclass == GS_SPRITE_CLASS)
{
d->t.u32[3] = z;
}
}
}
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
@ -314,7 +305,7 @@ void GSRendererSW::Draw()
GSScanlineGlobalData& gd = sd->global;
GSVector4i scissor = GSVector4i(context->scissor.in);
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
@ -713,7 +704,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
const GS_PRIM_CLASS primclass = m_vt->m_primclass;
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
gd.vm = m_mem.m_vm8;
@ -790,7 +781,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff)
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
{
gd.sel.iip = PRIM->IIP;
}
@ -800,7 +791,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.sel.tfx = context->TEX0.TFX;
gd.sel.tcc = context->TEX0.TCC;
gd.sel.fst = PRIM->FST;
gd.sel.ltf = m_vt->IsLinear();
gd.sel.ltf = m_vt.IsLinear();
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
{
@ -814,7 +805,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.sel.wms = context->CLAMP.WMS;
gd.sel.wmt = context->CLAMP.WMT;
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128)))
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
{
// modulate does not do anything when vertex color is 0x80
@ -833,7 +824,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
if(!t->Update(r)) {ASSERT(0); return false;}
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0)
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
{
uint64 frame = m_perfmon.GetFrame();
@ -850,7 +841,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
gd.tex[0] = t->m_buff;
gd.sel.tw = t->m_tw - 3;
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0)
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
{
// TEX1.MMIN
// 000 p
@ -860,13 +851,13 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
// 100 l round
// 101 l tri
if(m_vt->m_lod.x > 0)
if(m_vt.m_lod.x > 0)
{
gd.sel.ltf = context->TEX1.MMIN >> 2;
}
else
{
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0
}
gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
@ -875,9 +866,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16);
int k = context->TEX1.K << 12;
if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL)
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
{
k = (int)m_vt->m_lod.x << 16; // set lod to max level
k = (int)m_vt.m_lod.x << 16; // set lod to max level
gd.sel.lcm = 1; // lod is constant
gd.sel.mmin = 1; // tri-linear is meaningless
@ -891,7 +882,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
if(gd.sel.fst)
{
ASSERT(gd.sel.lcm == 1);
ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
gd.sel.lcm = 1;
}
@ -920,8 +911,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GIFRegTEX0 MIP_TEX0 = context->TEX0;
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
GSVector4 tmin = m_vt->m_min.t;
GSVector4 tmax = m_vt->m_max.t;
GSVector4 tmin = m_vt.m_min.t;
GSVector4 tmax = m_vt.m_max.t;
static int s_counter = 0;
@ -971,8 +962,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
MIP_CLAMP.MAXU >>= 1;
MIP_CLAMP.MAXV >>= 1;
m_vt->m_min.t *= 0.5f;
m_vt->m_max.t *= 0.5f;
m_vt.m_min.t *= 0.5f;
m_vt.m_max.t *= 0.5f;
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
@ -1014,8 +1005,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
s_counter++;
m_vt->m_min.t = tmin;
m_vt->m_max.t = tmax;
m_vt.m_min.t = tmin;
m_vt.m_max.t = tmax;
}
else
{
@ -1025,7 +1016,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
GSVertexSW* RESTRICT v = data->vertex;
if(m_vt->m_eq.q)
if(m_vt.m_eq.q)
{
gd.sel.fst = 1;
@ -1202,7 +1193,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000;
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
}
gd.fm = GSVector4i(fm);

View File

@ -71,9 +71,6 @@ protected:
bool GetScanlineGlobalData(SharedData* data);
template<uint32 prim, uint32 tme, uint32 fst>
void ConvertVertex(size_t dst_index, size_t src_index);
public:
GSRendererSW(int threads);
virtual ~GSRendererSW();

View File

@ -26,7 +26,7 @@
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
//#define Offset_UV // Fixes / breaks various titles
GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
GSState::GSState()
: m_version(6)
, m_mt(false)
, m_irq(NULL)
@ -35,7 +35,7 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
, m_crc(0)
, m_options(0)
, m_frameskip(0)
, m_vt(vt)
, m_vt(this)
{
m_nativeres = !!theApp.GetConfig("nativeres", 0);
@ -44,15 +44,8 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
memset(&m_vertex, 0, sizeof(m_vertex));
memset(&m_index, 0, sizeof(m_index));
ASSERT(vertex_stride >= sizeof(GSVertex));
m_vertex.stride = vertex_stride;
m_vertex.tmp = (uint8*)_aligned_malloc(m_vertex.stride * 2, 32);
GrowVertexBuffer();
memset(m_cv, 0, sizeof(m_cv));
m_sssize = 0;
m_sssize += sizeof(m_version);
@ -114,8 +107,6 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
GSState::~GSState()
{
_aligned_free(m_vertex.tmp);
if(m_vertex.buff) _aligned_free(m_vertex.buff);
if(m_index.buff) _aligned_free(m_index.buff);
}
@ -211,7 +202,7 @@ void GSState::Reset()
{
printf("GS reset\n");
memset(m_mem.m_vm8, 0, m_mem.m_vmsize);
// FIXME: memset(m_mem.m_vm8, 0, m_mem.m_vmsize); // bios logo not shown cut in half after reset, missing graphics in GoW after first FMV
memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path));
memset(&m_v, 0, sizeof(m_v));
@ -1274,40 +1265,8 @@ void GSState::FlushPrim()
{
if(m_index.tail > 0)
{
if(0)
{
uint8* buff = new uint8[m_vertex.next];
GSVertex buff[2];
memset(buff, 0, m_vertex.next);
for(size_t i = 0; i < m_index.tail; i++)
{
ASSERT(m_index.buff[i] < m_vertex.next);
buff[m_index.buff[i]] = 1;
}
size_t count = 0;
for(size_t i = 0; i < m_vertex.next; i++)
{
if(buff[i] == 0)
{
count++;
}
}
if(count > 0)
{
printf("unref %lld %d/%d\n", m_perfmon.GetFrame(), count, m_vertex.next);
}
delete [] buff;
}
uint8* buff = m_vertex.tmp;
size_t stride = m_vertex.stride;
size_t head = m_vertex.head;
size_t tail = m_vertex.tail;
size_t next = m_vertex.next;
@ -1326,11 +1285,11 @@ void GSState::FlushPrim()
case GS_TRIANGLELIST:
case GS_TRIANGLESTRIP:
unused = tail - head;
memcpy(buff, &m_vertex.buff[stride * head], stride * unused);
memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused);
break;
case GS_TRIANGLEFAN:
memcpy(buff, &m_vertex.buff[stride * head], stride); unused = 1;
if(tail - 1 > head) {memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride); unused = 2;}
buff[0] = m_vertex.buff[head]; unused = 1;
if(tail - 1 > head) {buff[1] = m_vertex.buff[tail - 1]; unused = 2;}
break;
case GS_INVALID:
break;
@ -1345,7 +1304,7 @@ void GSState::FlushPrim()
{
// FIXME: berserk fpsm = 27 (8H)
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
Draw();
@ -1359,7 +1318,7 @@ void GSState::FlushPrim()
if(unused > 0)
{
memcpy(m_vertex.buff, buff, stride * unused);
memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused);
m_vertex.tail = unused;
m_vertex.next = next > head ? next - head : 0;
@ -2182,20 +2141,18 @@ void GSState::UpdateVertexKick()
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3];
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
m_cvf = m_cv[prim][PRIM->TME][PRIM->FST];
}
void GSState::GrowVertexBuffer()
{
int maxcount = std::max<int>(m_vertex.maxcount * 3 / 2, 10000);
uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16);
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 16);
uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3
if(m_vertex.buff != NULL)
{
memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail);
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
_aligned_free(m_vertex.buff);
}
@ -2227,17 +2184,13 @@ __forceinline void GSState::VertexKick(uint32 skip)
GSVector4i v0(m_v.m[0]);
GSVector4i v1(m_v.m[1]);
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[m_vertex.stride * tail];
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail];
tailptr[0] = v0;
tailptr[1] = v1;
m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.sub16(GSVector4i::load(m_ofxy)).sra16(4)).upl16()); // zw not sign extended, only useful for eq tests
#ifdef _DEBUG
memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex));
#endif
m_vertex.tail = ++tail;
m_vertex.xy_tail = ++xy_tail;
@ -2353,8 +2306,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
size_t src_index = head;
switch(prim)
{
case GS_POINTLIST:
@ -2362,7 +2313,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
m_vertex.head = head + 1;
m_vertex.next = head + 1;
m_index.tail += 1;
(this->*m_cvf)(head, head);
break;
case GS_LINELIST:
buff[0] = head + 0;
@ -2370,18 +2320,20 @@ __forceinline void GSState::VertexKick(uint32 skip)
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
break;
case GS_LINESTRIP:
if(next < head) {head = next; m_vertex.tail = next + 2;}
if(next < head)
{
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
head = next;
m_vertex.tail = next + 2;
}
buff[0] = head + 0;
buff[1] = head + 1;
m_vertex.head = head + 1;
m_vertex.next = head + 2;
m_index.tail += 2;
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
/*if(head + 1 >= next)*/ (this->*m_cvf)(head + 1, src_index + 1); // this is always a new vertex
break;
case GS_TRIANGLELIST:
buff[0] = head + 0;
@ -2390,21 +2342,22 @@ __forceinline void GSState::VertexKick(uint32 skip)
m_vertex.head = head + 3;
m_vertex.next = head + 3;
m_index.tail += 3;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
(this->*m_cvf)(head + 2, head + 2);
break;
case GS_TRIANGLESTRIP:
if(next < head) {head = next; m_vertex.tail = next + 3;}
if(next < head)
{
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
m_vertex.buff[next + 2] = m_vertex.buff[head + 2];
head = next;
m_vertex.tail = next + 3;
}
buff[0] = head + 0;
buff[1] = head + 1;
buff[2] = head + 2;
m_vertex.head = head + 1;
m_vertex.next = head + 3;
m_index.tail += 3;
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
/*if(src_index + 2 >= next)*/ (this->*m_cvf)(head + 2, src_index + 2); // this is always a new vertex
break;
case GS_TRIANGLEFAN:
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
@ -2413,9 +2366,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
buff[2] = tail - 1;
m_vertex.next = tail;
m_index.tail += 3;
if(head >= next) (this->*m_cvf)(head, head);
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
/*if(tail - 1 >= next)*/ (this->*m_cvf)(tail - 1, tail - 1); // this is always a new vertex
break;
case GS_SPRITE:
buff[0] = head + 0;
@ -2423,10 +2373,8 @@ __forceinline void GSState::VertexKick(uint32 skip)
m_vertex.head = head + 2;
m_vertex.next = head + 2;
m_index.tail += 2;
(this->*m_cvf)(head + 0, head + 0);
(this->*m_cvf)(head + 1, head + 1);
break;
case GS_INVALID:
case GS_INVALID:
m_vertex.tail = head;
break;
default:
@ -2492,7 +2440,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
if(wms + wmt < 6)
{
GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t);
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
if(linear)
{
@ -2570,7 +2518,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
void GSState::GetAlphaMinMax()
{
if(m_vt->m_alpha.valid)
if(m_vt.m_alpha.valid)
{
return;
}
@ -2578,7 +2526,7 @@ void GSState::GetAlphaMinMax()
const GSDrawingEnvironment& env = m_env;
const GSDrawingContext* context = m_context;
GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww();
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if(PRIM->TME && context->TEX0.TCC)
{
@ -2630,9 +2578,9 @@ void GSState::GetAlphaMinMax()
}
}
m_vt->m_alpha.min = a.x;
m_vt->m_alpha.max = a.z;
m_vt->m_alpha.valid = true;
m_vt.m_alpha.min = a.x;
m_vt.m_alpha.max = a.z;
m_vt.m_alpha.valid = true;
}
bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
@ -2649,8 +2597,8 @@ bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
{
GetAlphaMinMax();
int amin = m_vt->m_alpha.min;
int amax = m_vt->m_alpha.max;
int amin = m_vt.m_alpha.min;
int amax = m_vt.m_alpha.max;
int aref = context->TEST.AREF;
@ -2734,8 +2682,8 @@ bool GSState::IsOpaque()
{
GetAlphaMinMax();
amin = m_vt->m_alpha.min;
amax = m_vt->m_alpha.max;
amin = m_vt.m_alpha.min;
amax = m_vt.m_alpha.max;
}
else if(context->ALPHA.C == 1)
{

View File

@ -143,12 +143,10 @@ protected:
struct
{
uint8* buff;
size_t stride;
GSVertex* buff;
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
GSVector4 xy[4];
size_t xy_tail;
uint8* tmp;
} m_vertex;
struct
@ -157,26 +155,6 @@ protected:
size_t tail;
} m_index;
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
#define InitConvertVertex2(T, P) \
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
#define InitConvertVertex(T) \
InitConvertVertex2(T, GS_POINTLIST) \
InitConvertVertex2(T, GS_LINELIST) \
InitConvertVertex2(T, GS_LINESTRIP) \
InitConvertVertex2(T, GS_TRIANGLELIST) \
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
InitConvertVertex2(T, GS_TRIANGLEFAN) \
InitConvertVertex2(T, GS_SPRITE) \
InitConvertVertex2(T, GS_INVALID) \
void UpdateContext();
void UpdateScissor();
@ -189,7 +167,7 @@ protected:
// following functions need m_vt to be initialized
GSVertexTrace* m_vt;
GSVertexTrace m_vt;
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
void GetAlphaMinMax();
@ -213,7 +191,7 @@ public:
bool m_nativeres;
public:
GSState(GSVertexTrace* vt, size_t vertex_stride);
GSState();
virtual ~GSState();
void ResetHandlers();

View File

@ -82,13 +82,6 @@ bool GSDevice11::CreateTextureFX()
return true;
}
void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{
IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
}
void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{
hash_map<uint32, GSVertexShader11 >::const_iterator i = m_vs.find(sel);
@ -118,6 +111,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
};

View File

@ -61,13 +61,6 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
return t;
}
void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
{
IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
IASetIndexBuffer(index, index_count);
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
}
void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
{
hash_map<uint32, GSVertexShader9>::const_iterator i = m_vs.find(sel);

View File

@ -22,6 +22,46 @@
#include "stdafx.h"
#include "GSVector.h"
const GSVector4i GSVector4i::m_xff[16] =
{
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
};
const GSVector4i GSVector4i::m_x0f[16] =
{
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
};
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
const GSVector4 GSVector4::m_half(0.5f);

View File

@ -79,6 +79,9 @@ class GSVector4;
__aligned(class, 16) GSVector4i
{
static const GSVector4i m_xff[16];
static const GSVector4i m_x0f[16];
public:
union
{
@ -2343,6 +2346,9 @@ public:
__forceinline static GSVector4i xfff8(const GSVector4i& v) {return xffffffff(v).sll16( 3);}
__forceinline static GSVector4i xfffc(const GSVector4i& v) {return xffffffff(v).sll16( 2);}
__forceinline static GSVector4i xfffe(const GSVector4i& v) {return xffffffff(v).sll16( 1);}
__forceinline static GSVector4i xff(int n) {return m_xff[n];}
__forceinline static GSVector4i x0f(int n) {return m_x0f[n];}
};
__aligned(class, 16) GSVector4

View File

@ -37,7 +37,8 @@ __aligned(struct, 32) GSVertex
GIFRegST ST;
GIFRegRGBAQ RGBAQ;
GIFRegXYZ XYZ;
uint32 UV, FOG;
union {uint32 UV; struct {uint16 U, V;};};
uint32 FOG;
};
__m128i m[2];

View File

@ -37,45 +37,4 @@ __aligned(struct, 32) GSVertexHW9
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
};
__aligned(union, 32) GSVertexHW11
{
struct
{
union
{
struct {float x, y;} t;
GIFRegST ST;
};
union
{
union {struct {uint8 r, g, b, a; float q;}; uint32 c0;};
GIFRegRGBAQ RGBAQ;
};
union
{
struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p;
GIFRegXYZ XYZ;
};
union
{
struct {uint32 _pad; union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;};};
GIFRegFOG FOG;
};
};
GSVertexHW11& operator = (GSVertexHW11& v)
{
GSVector4i* RESTRICT src = (GSVector4i*)&v;
GSVector4i* RESTRICT dst = (GSVector4i*)this;
dst[0] = src[0];
dst[1] = src[1];
return *this;
}
};
#pragma pack(pop)

View File

@ -29,10 +29,38 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
GSVertexTrace::GSVertexTrace(const GSState* state)
: m_state(state)
{
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
#define InitUpdate2(P, IIP, TME) \
InitUpdate3(P, IIP, TME, 0, 0) \
InitUpdate3(P, IIP, TME, 0, 1) \
InitUpdate3(P, IIP, TME, 1, 0) \
InitUpdate3(P, IIP, TME, 1, 1) \
#define InitUpdate(P) \
InitUpdate2(P, 0, 0) \
InitUpdate2(P, 0, 1) \
InitUpdate2(P, 1, 0) \
InitUpdate2(P, 1, 1) \
InitUpdate(GS_POINT_CLASS);
InitUpdate(GS_LINE_CLASS);
InitUpdate(GS_TRIANGLE_CLASS);
InitUpdate(GS_SPRITE_CLASS);
}
void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_primclass = primclass;
uint32 iip = m_state->PRIM->IIP;
uint32 tme = m_state->PRIM->TME;
uint32 fst = m_state->PRIM->FST;
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, count);
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
m_alpha.valid = false;
@ -82,90 +110,108 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, G
}
}
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
{
m_primclass = primclass;
uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4);
if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC))
{
hash |= 1 << 5;
}
return hash;
}
GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceSW", NULL)
{
}
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
GSVertexTrace::Update(vertex, index, count, primclass);
}
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW9", NULL)
{
}
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
const GSDrawingContext* context = m_state->m_context;
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
bool sprite = primclass == GS_SPRITE_CLASS;
m_min.p = (m_min.p - o) * s;
m_max.p = (m_max.p - o) * s;
int n = 1;
if(m_state->PRIM->TME)
switch(primclass)
{
if(m_state->PRIM->FST)
{
s = GSVector4(1 << (16 - 4), 1).xxyy();
}
else
{
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
}
m_min.t *= s;
m_max.t *= s;
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
GSVertexTrace::Update(vertex, index, count, primclass);
}
GSVector4 pmin = s_minmax.xxxx();
GSVector4 pmax = s_minmax.yyyy();
GSVector4 tmin = s_minmax.xxxx();
GSVector4 tmax = s_minmax.yyyy();
GSVector4i cmin = GSVector4i::xffffffff();
GSVector4i cmax = GSVector4i::zero();
GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
: GSVertexTrace(state)
, m_map("VertexTraceHW11", NULL)
{
}
const GSVertex* RESTRICT v = (GSVertex*)vertex;
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
{
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
for(int i = 0; i < count; i += n)
{
GSVector4 q;
GSVector4i f;
const GSDrawingContext* context = m_state->m_context;
if(sprite)
{
if(tme && !fst)
{
q = GSVector4::load<true>(&v[index[i + 1]]).wwww();
}
f = GSVector4i(v[index[i + 1]].m[1]).wwww();
}
for(int j = 0; j < n; j++)
{
GSVector4i c(v[index[i + j]].m[0]);
if(color && (iip || j == n - 1)) // TODO: unroll, to avoid j == n - 1
{
cmin = cmin.min_u8(c);
cmax = cmax.max_u8(c);
}
if(tme)
{
if(!fst)
{
GSVector4 stq = GSVector4::cast(c);
GSVector4 q2 = !sprite ? stq.wwww() : q;
stq = (stq.xyww() * q2.rcpnr()).xyww(q2);
tmin = tmin.min(stq);
tmax = tmax.max(stq);
}
else
{
GSVector4i uv(v[index[i + j]].m[1]);
GSVector4 st = GSVector4(uv.uph16()).xyxy();
tmin = tmin.min(st);
tmax = tmax.max(st);
}
}
GSVector4i xyzf(v[index[i + j]].m[1]);
GSVector4i xy = xyzf.upl16();
GSVector4i z = xyzf.yyyy().srl32(1);
GSVector4 p = GSVector4(xy.upl64(z.upl32(!sprite ? xyzf.wwww() : f)));
pmin = pmin.min(p);
pmax = pmax.max(p);
}
}
GSVector4 o(context->XYOFFSET);
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);
m_min.p = (m_min.p - o) * s;
m_max.p = (m_max.p - o) * s;
m_min.p = (pmin - o) * s;
m_max.p = (pmax - o) * s;
if(m_state->PRIM->TME)
if(tme)
{
if(m_state->PRIM->FST)
if(fst)
{
s = GSVector4(1 << (16 - 4), 1).xxyy();
}
@ -174,10 +220,13 @@ void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int coun
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
}
m_min.t *= s;
m_max.t *= s;
m_min.t = tmin * s;
m_max.t = tmax * s;
}
GSVertexTrace::Update(vertex, index, count, primclass);
if(color)
{
m_min.c = cmin.zzzz().u8to32();
m_max.c = cmax.zzzz().u8to32();
}
}

View File

@ -38,12 +38,15 @@ public:
protected:
const GSState* m_state;
uint32 Hash(GS_PRIM_CLASS primclass);
typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
static const GSVector4 s_minmax;
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
FindMinMaxPtr m_fmm[2][2][2][2][4];
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
void FindMinMax(const void* vertex, const uint32* index, int count);
public:
GS_PRIM_CLASS m_primclass;
@ -69,55 +72,7 @@ public:
GSVertexTrace(const GSState* state);
virtual ~GSVertexTrace() {}
virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
bool IsLinear() const {return m_filter.linear;}
};
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceSW(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX9(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
{
class CG : public GSCodeGenerator
{
public:
CG(const void* param, uint32 key, void* code, size_t maxsize);
};
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
public:
GSVertexTraceDX11(const GSState* state);
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
};

View File

@ -1,496 +0,0 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSVertexTrace.h"
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
using namespace Xbyak;
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
mov(rax, (size_t)&s_minmax);
vbroadcastss(xmm4, ptr[rax + 0]);
vbroadcastss(xmm5, ptr[rax + 4]);
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
vmovaps(xmm2, xmm4);
vmovaps(xmm3, xmm5);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
vminps(xmm2, xmm0);
vmaxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
if(!fst)
{
if(primclass != GS_SPRITE_CLASS)
{
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
}
vdivps(xmm0, xmm1);
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
}
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
}
add(rdx, n * sizeof(GSVertexSW));
sub(ecx, n);
jg("loop");
// }
if(color)
{
vcvttps2dq(xmm2, xmm2);
vpsrld(xmm2, 7);
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
vcvttps2dq(xmm3, xmm3);
vpsrld(xmm3, 7);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
add(rsp, 8 + 2 * 16);
ret();
}
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_SPRITE_CLASS:
n = 6;
break;
}
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
mov(rax, (size_t)&s_minmax);
vbroadcastss(xmm4, ptr[rax + 0]);
vbroadcastss(xmm5, ptr[rax + 4]);
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
vpcmpeqd(xmm2, xmm2);
vpxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
vmovaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
if(tme && !fst && primclass != GS_SPRITE_CLASS)
{
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
}
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
{
// min.c = min.c.min_u8(v[i + j].c);
// max.c = max.c.min_u8(v[i + j].c);
vpminub(xmm2, xmm0);
vpmaxub(xmm3, xmm0);
}
if(tme)
{
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
if(!fst)
{
// t /= p.wwww();
vdivps(xmm0, xmm1);
}
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
}
add(rdx, n * sizeof(GSVertexHW9));
sub(ecx, n);
jg("loop");
// }
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm2, xmm2);
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm3, xmm3);
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin;
// m_max.p = pmax;
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin.xyww(pmin);
// m_max.t = tmax.xyww(pmax);
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
add(rsp, 8 + 2 * 16);
ret();
}
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
sub(rsp, 8 + 2 * 16);
vmovdqa(ptr[rsp + 0], xmm6);
vmovdqa(ptr[rsp + 16], xmm7);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
mov(rax, (size_t)&s_minmax);
vbroadcastss(xmm4, ptr[rax + 0]);
vbroadcastss(xmm5, ptr[rax + 4]);
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
vpcmpeqd(xmm2, xmm2);
vpxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]);
}
if(color && (iip || j == n - 1))
{
vpminub(xmm2, xmm0);
vpmaxub(xmm3, xmm0);
}
if(tme)
{
if(!fst)
{
vmovaps(xmm1, xmm0);
}
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
if(!fst)
{
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
vdivps(xmm0, xmm1);
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
}
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
vmovdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]);
vpmovzxwd(xmm1, xmm0);
vpsrld(xmm0, 1);
vpunpcklqdq(xmm1, xmm0);
vcvtdq2ps(xmm1, xmm1);
vminps(xmm4, xmm1);
vmaxps(xmm5, xmm1);
}
add(rdx, n * sizeof(GSVertexHW11));
sub(ecx, n);
jg("loop");
// }
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm2, xmm2);
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm3, xmm3);
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin.xyww();
// m_max.p = pmax.xyww();
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin;
// m_max.t = tmax;
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
vmovdqa(xmm6, ptr[rsp + 0]);
vmovdqa(xmm7, ptr[rsp + 16]);
add(rsp, 8 + 2 * 16);
ret();
}
#endif

View File

@ -1,543 +0,0 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSVertexTrace.h"
#if _M_SSE < 0x500 && (defined(_M_AMD64) || defined(_WIN64))
using namespace Xbyak;
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
sub(rsp, 8 + 2 * 16);
movdqa(ptr[rsp + 0], xmm6);
movdqa(ptr[rsp + 16], xmm7);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
mov(rax, (size_t)&s_minmax);
movss(xmm4, ptr[rax + 0]);
movss(xmm5, ptr[rax + 4]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm4);
movaps(xmm3, xmm5);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
if(!fst)
{
if(primclass != GS_SPRITE_CLASS)
{
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
divps(xmm0, xmm1);
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
}
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(rdx, n * sizeof(GSVertexSW));
sub(rcx, n);
jg("loop");
// }
if(color)
{
cvttps2dq(xmm2, xmm2);
psrld(xmm2, 7);
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
cvttps2dq(xmm3, xmm3);
psrld(xmm3, 7);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
movdqa(xmm6, ptr[rsp + 0]);
movdqa(xmm7, ptr[rsp + 16]);
add(rsp, 8 + 2 * 16);
ret();
}
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_SPRITE_CLASS:
n = 6;
break;
}
sub(rsp, 8 + 2 * 16);
movdqa(ptr[rsp + 0], xmm6);
movdqa(ptr[rsp + 16], xmm7);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
mov(rax, (size_t)&s_minmax);
movss(xmm4, ptr[rax + 0]);
movss(xmm5, ptr[rax + 16]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
pcmpeqd(xmm2, xmm2);
pxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
movaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme && !fst && primclass != GS_SPRITE_CLASS)
{
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
{
// min.c = min.c.min_u8(v[i + j].c);
// max.c = max.c.min_u8(v[i + j].c);
pminub(xmm2, xmm0);
pmaxub(xmm3, xmm0);
}
if(tme)
{
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
if(!fst)
{
// t /= p.wwww();
divps(xmm0, xmm1);
}
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(rdx, n * sizeof(GSVertexHW9));
sub(ecx, n);
jg("loop");
// }
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm2, xmm2);
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm3, xmm3);
}
else
{
pxor(xmm0, xmm0);
punpckhbw(xmm2, xmm0);
punpcklwd(xmm2, xmm0);
punpckhbw(xmm3, xmm0);
punpcklwd(xmm3, xmm0);
}
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin;
// m_max.p = pmax;
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin.xyww(pmin);
// m_max.t = tmax.xyww(pmax);
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
movdqa(xmm6, ptr[rsp + 0]);
movdqa(xmm7, ptr[rsp + 16]);
add(rsp, 8 + 2 * 16);
ret();
}
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
sub(rsp, 8 + 2 * 16);
movdqa(ptr[rsp + 0], xmm6);
movdqa(ptr[rsp + 16], xmm7);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
mov(rax, (size_t)&s_minmax);
movss(xmm4, ptr[rax + 0]);
movss(xmm5, ptr[rax + 16]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
pcmpeqd(xmm2, xmm2);
pxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]);
}
if(color && (iip || j == n - 1))
{
pminub(xmm2, xmm0);
pmaxub(xmm3, xmm0);
}
if(tme)
{
if(!fst)
{
movaps(xmm1, xmm0);
}
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
if(!fst)
{
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
divps(xmm0, xmm1);
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
}
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
movdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]);
if(m_cpu.has(util::Cpu::tSSE41))
{
pmovzxwd(xmm1, xmm0);
}
else
{
movdqa(xmm1, xmm0);
punpcklwd(xmm1, xmm1);
psrld(xmm1, 16);
}
psrld(xmm0, 1);
punpcklqdq(xmm1, xmm0);
cvtdq2ps(xmm1, xmm1);
minps(xmm4, xmm1);
maxps(xmm5, xmm1);
}
add(rdx, n * sizeof(GSVertexHW11));
sub(ecx, n);
jg("loop");
// }
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm2, xmm2);
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm3, xmm3);
}
else
{
pxor(xmm0, xmm0);
punpckhbw(xmm2, xmm0);
punpcklwd(xmm2, xmm0);
punpckhbw(xmm3, xmm0);
punpcklwd(xmm3, xmm0);
}
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin.xyww();
// m_max.p = pmax.xyww();
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin;
// m_max.t = tmax;
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
movdqa(xmm6, ptr[rsp + 0]);
movdqa(xmm7, ptr[rsp + 16]);
add(rsp, 8 + 2 * 16);
ret();
}
#endif

View File

@ -1,513 +0,0 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSVertexTrace.h"
#if _M_SSE >= 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
using namespace Xbyak;
static const int _args = 4;
static const int _count = _args + 4; // rcx
static const int _vertex = _args + 8; // rdx
static const int _index = _args + 12; // r8
static const int _min = _args + 16; // r9
static const int _max = _args + 20; // _args + 4
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
vbroadcastss(xmm4, ptr[&s_minmax.x]);
vbroadcastss(xmm5, ptr[&s_minmax.y]);
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
vmovaps(xmm2, xmm4);
vmovaps(xmm3, xmm5);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
vminps(xmm2, xmm0);
vmaxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst)
{
if(primclass != GS_SPRITE_CLASS)
{
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
}
vdivps(xmm0, xmm1);
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
}
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
}
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
vcvttps2dq(xmm2, xmm2);
vpsrld(xmm2, 7);
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
vcvttps2dq(xmm3, xmm3);
vpsrld(xmm3, 7);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_SPRITE_CLASS:
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
vbroadcastss(xmm4, ptr[&s_minmax.x]);
vbroadcastss(xmm5, ptr[&s_minmax.y]);
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
vpcmpeqd(xmm2, xmm2);
vpxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
vminps(xmm4, xmm0);
vmaxps(xmm5, xmm0);
if(tme && !fst && primclass != GS_SPRITE_CLASS)
{
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
}
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
{
// min.c = min.c.min_u8(v[i + j].c);
// max.c = max.c.min_u8(v[i + j].c);
vpminub(xmm2, xmm0);
vpmaxub(xmm3, xmm0);
}
if(tme)
{
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
if(!fst)
{
// t /= p.wwww();
vdivps(xmm0, xmm1);
}
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
}
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm2, xmm2);
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm3, xmm3);
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin;
// m_max.p = pmax;
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin.xyww(pmin);
// m_max.t = tmax.xyww(pmax);
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
vbroadcastss(xmm4, ptr[&s_minmax.x]);
vbroadcastss(xmm5, ptr[&s_minmax.y]);
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
vpcmpeqd(xmm2, xmm2);
vpxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
vmovaps(xmm6, xmm4);
vmovaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme)
{
vmovaps(xmm0, ptr[edx + eax]);
}
if(color && (iip || j == n - 1))
{
vpminub(xmm2, xmm0);
vpmaxub(xmm3, xmm0);
}
if(tme)
{
if(!fst)
{
vmovaps(xmm1, xmm0);
}
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
if(!fst)
{
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
vdivps(xmm0, xmm1);
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
}
vminps(xmm6, xmm0);
vmaxps(xmm7, xmm0);
}
vmovdqa(xmm0, ptr[edx + eax + 16]);
vpmovzxwd(xmm1, xmm0);
vpsrld(xmm0, 1);
vpunpcklqdq(xmm1, xmm0);
vcvtdq2ps(xmm1, xmm1);
vminps(xmm4, xmm1);
vmaxps(xmm5, xmm1);
}
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm2, xmm2);
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
vpmovzxbd(xmm3, xmm3);
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin.xyww();
// m_max.p = pmax.xyww();
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin;
// m_max.t = tmax;
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
#endif

View File

@ -1,562 +0,0 @@
/*
* Copyright (C) 2007-2009 Gabest
* http://www.gabest.org
*
* This Program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2, or (at your option)
* any later version.
*
* This Program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with GNU Make; see the file COPYING. If not, write to
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
* http://www.gnu.org/copyleft/gpl.html
*
*/
#include "stdafx.h"
#include "GSVertexTrace.h"
#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
using namespace Xbyak;
static const int _args = 4;
static const int _count = _args + 4; // rcx
static const int _vertex = _args + 8; // rdx
static const int _index = _args + 12; // r8
static const int _min = _args + 16; // r9
static const int _max = _args + 20; // _args + 4
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&s_minmax.x]);
movss(xmm5, ptr[&s_minmax.y]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = FLT_MAX;
// max.c = -FLT_MAX;
movaps(xmm2, xmm4);
movaps(xmm3, xmm5);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 6); // * sizeof(GSVertexSW)
if(color && (iip || j == n - 1))
{
// min.c = min.c.minv(v[i + j].c);
// max.c = max.c.maxv(v[i + j].c);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
minps(xmm2, xmm0);
maxps(xmm3, xmm0);
}
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme)
{
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
if(!fst)
{
if(primclass != GS_SPRITE_CLASS)
{
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
}
divps(xmm0, xmm1);
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
}
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
cvttps2dq(xmm2, xmm2);
psrld(xmm2, 7);
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
cvttps2dq(xmm3, xmm3);
psrld(xmm3, 7);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
case GS_SPRITE_CLASS:
n = 6;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&s_minmax.x]);
movss(xmm5, ptr[&s_minmax.y]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
pcmpeqd(xmm2, xmm2);
pxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
if(tme && !fst && primclass == GS_SPRITE_CLASS)
{
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW9)
// min.p = min.p.minv(v[i + j].p);
// max.p = max.p.maxv(v[i + j].p);
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
minps(xmm4, xmm0);
maxps(xmm5, xmm0);
if(tme && !fst && primclass != GS_SPRITE_CLASS)
{
movaps(xmm1, xmm0);
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
}
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
}
if(color && (iip || j == n - 1))
{
// min.c = min.c.min_u8(v[i + j].c);
// max.c = max.c.min_u8(v[i + j].c);
pminub(xmm2, xmm0);
pmaxub(xmm3, xmm0);
}
if(tme)
{
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
if(!fst)
{
// t /= p.wwww();
divps(xmm0, xmm1);
}
// min.t = min.t.minv(v[i + j].t);
// max.t = max.t.maxv(v[i + j].t);
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
}
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm2, xmm2);
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm3, xmm3);
}
else
{
pxor(xmm0, xmm0);
punpckhbw(xmm2, xmm0);
punpcklwd(xmm2, xmm0);
punpckhbw(xmm3, xmm0);
punpcklwd(xmm3, xmm0);
}
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin;
// m_max.p = pmax;
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin.xyww(pmin);
// m_max.t = tmax.xyww(pmax);
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
: GSCodeGenerator(code, maxsize)
{
uint32 primclass = (key >> 0) & 3;
uint32 iip = (key >> 2) & 1;
uint32 tme = (key >> 3) & 1;
uint32 fst = (key >> 4) & 1;
uint32 color = (key >> 5) & 1;
int n = 1;
switch(primclass)
{
case GS_POINT_CLASS:
n = 1;
break;
case GS_LINE_CLASS:
case GS_SPRITE_CLASS:
n = 2;
break;
case GS_TRIANGLE_CLASS:
n = 3;
break;
}
push(ebx);
// min.p = FLT_MAX;
// max.p = -FLT_MAX;
movss(xmm4, ptr[&s_minmax.x]);
movss(xmm5, ptr[&s_minmax.y]);
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
if(color)
{
// min.c = 0xffffffff;
// max.c = 0;
pcmpeqd(xmm2, xmm2);
pxor(xmm3, xmm3);
}
if(tme)
{
// min.t = FLT_MAX;
// max.t = -FLT_MAX;
movaps(xmm6, xmm4);
movaps(xmm7, xmm5);
}
// for(int i = 0; i < count; i += step) {
mov(edx, dword[esp + _vertex]);
mov(ebx, dword[esp + _index]);
mov(ecx, dword[esp + _count]);
align(16);
L("loop");
for(int j = 0; j < n; j++)
{
mov(eax, ptr[ebx + j * sizeof(uint32)]);
shl(eax, 5); // * sizeof(GSVertexHW11)
if(color && (iip || j == n - 1) || tme)
{
movaps(xmm0, ptr[edx + eax]);
}
if(color && (iip || j == n - 1))
{
pminub(xmm2, xmm0);
pmaxub(xmm3, xmm0);
}
if(tme)
{
if(!fst)
{
movaps(xmm1, xmm0);
}
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
if(!fst)
{
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
divps(xmm0, xmm1);
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
}
minps(xmm6, xmm0);
maxps(xmm7, xmm0);
}
movdqa(xmm0, ptr[edx + eax + 16]);
if(m_cpu.has(util::Cpu::tSSE41))
{
pmovzxwd(xmm1, xmm0);
}
else
{
movdqa(xmm1, xmm0);
punpcklwd(xmm1, xmm1);
psrld(xmm1, 16);
}
psrld(xmm0, 1);
punpcklqdq(xmm1, xmm0);
cvtdq2ps(xmm1, xmm1);
minps(xmm4, xmm1);
maxps(xmm5, xmm1);
}
add(ebx, n * sizeof(uint32));
sub(ecx, n);
jg("loop");
// }
mov(eax, dword[esp + _min]);
mov(edx, dword[esp + _max]);
if(color)
{
// m_min.c = cmin.zzzz().u8to32();
// m_max.c = cmax.zzzz().u8to32();
if(m_cpu.has(util::Cpu::tSSE41))
{
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm2, xmm2);
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
pmovzxbd(xmm3, xmm3);
}
else
{
pxor(xmm0, xmm0);
punpckhbw(xmm2, xmm0);
punpcklwd(xmm2, xmm0);
punpckhbw(xmm3, xmm0);
punpcklwd(xmm3, xmm0);
}
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
}
// m_min.p = pmin.xyww();
// m_max.p = pmax.xyww();
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
if(tme)
{
// m_min.t = tmin;
// m_max.t = tmax;
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
}
pop(ebx);
ret();
}
#endif

View File

@ -618,62 +618,6 @@
<ClCompile Include="GSVertexList.cpp" />
<ClCompile Include="GSVertexSW.cpp" />
<ClCompile Include="GSVertexTrace.cpp" />
<ClCompile Include="GSVertexTrace.x64.avx.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSVertexTrace.x64.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSVertexTrace.x86.avx.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSVertexTrace.x86.cpp">
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
</ClCompile>
<ClCompile Include="GSWnd.cpp" />
<ClCompile Include="stdafx.cpp">
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">Create</PrecompiledHeader>

View File

@ -288,18 +288,6 @@
<ClCompile Include="GSDeviceSDL.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GSVertexTrace.x64.avx.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GSVertexTrace.x64.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GSVertexTrace.x86.avx.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GSVertexTrace.x86.cpp">
<Filter>Source Files</Filter>
</ClCompile>
<ClCompile Include="GSSetupPrimCodeGenerator.x64.avx.cpp">
<Filter>Source Files</Filter>
</ClCompile>

View File

@ -1244,110 +1244,6 @@
RelativePath=".\GSVertexTrace.cpp"
>
</File>
<File
RelativePath=".\GSVertexTrace.x64.cpp"
>
<FileConfiguration
Name="Debug SSE2|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSE2|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSSE3|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug SSSE3|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug SSE4|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSE4|Win32"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\GSVertexTrace.x86.cpp"
>
<FileConfiguration
Name="Debug SSE2|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSE2|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSSE3|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug SSSE3|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Debug SSE4|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
<FileConfiguration
Name="Release SSE4|x64"
ExcludedFromBuild="true"
>
<Tool
Name="VCCLCompilerTool"
/>
</FileConfiguration>
</File>
<File
RelativePath=".\GSWnd.cpp"
>

View File

@ -40,11 +40,12 @@
struct VS_INPUT
{
float2 st : TEXCOORD0;
float4 c : COLOR0;
float q : TEXCOORD1;
uint2 p : POSITION0;
uint z : POSITION1;
float2 t : TEXCOORD0;
float q : TEXCOORD1;
float4 c : COLOR0;
uint2 uv : TEXCOORD2;
float4 f : COLOR1;
};
@ -602,12 +603,12 @@ VS_OUTPUT vs_main(VS_INPUT input)
{
if(VS_FST)
{
output.t.xy = input.t * TextureScale;
output.t.xy = input.uv * TextureScale;
output.t.w = 1.0f;
}
else
{
output.t.xy = input.t;
output.t.xy = input.st;
output.t.w = input.q;
}
}