mirror of https://github.com/PCSX2/pcsx2.git
GSdx: Simplified vertex formats and the related code, everything works with the basic GSVertex until it gets uploaded to the vertex buffer.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5074 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
d763bf6616
commit
9ec7f14fa8
|
@ -1103,13 +1103,13 @@ __aligned(struct, 32) GIFPath
|
|||
|
||||
enum {TYPE_UNKNOWN, TYPE_ADONLY, TYPE_STQRGBAXYZF2};
|
||||
|
||||
void SetTag(const void* mem)
|
||||
__forceinline void SetTag(const void* mem)
|
||||
{
|
||||
GSVector4i v = GSVector4i::load<false>(mem);
|
||||
GSVector4i::store<true>(&tag, v);
|
||||
reg = 0;
|
||||
regs = v.uph8(v >> 4) & 0x0f0f0f0f;
|
||||
nreg = tag.NREG ? tag.NREG : 16;
|
||||
regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg);
|
||||
nloop = tag.NLOOP;
|
||||
type = TYPE_UNKNOWN;
|
||||
if(regs.u32[0] == 0x00040102 && nreg == 3) type = TYPE_STQRGBAXYZF2;
|
||||
|
|
|
@ -720,6 +720,18 @@ void GSDevice11::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vert
|
|||
}
|
||||
|
||||
void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
||||
{
|
||||
void* ptr = NULL;
|
||||
|
||||
if(IAMapVertexBuffer(&ptr, stride, count))
|
||||
{
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
IAUnmapVertexBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
bool GSDevice11::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
|
||||
{
|
||||
ASSERT(m_vertex.count == 0);
|
||||
|
||||
|
@ -747,7 +759,7 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
|
||||
hr = m_dev->CreateBuffer(&bd, NULL, &m_vb);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
if(FAILED(hr)) return false;
|
||||
}
|
||||
|
||||
D3D11_MAP type = D3D11_MAP_WRITE_NO_OVERWRITE;
|
||||
|
@ -761,17 +773,24 @@ void GSDevice11::IASetVertexBuffer(const void* vertex, size_t stride, size_t cou
|
|||
|
||||
D3D11_MAPPED_SUBRESOURCE m;
|
||||
|
||||
if(SUCCEEDED(m_ctx->Map(m_vb, 0, type, 0, &m)))
|
||||
if(FAILED(m_ctx->Map(m_vb, 0, type, 0, &m)))
|
||||
{
|
||||
GSVector4i::storent((uint8*)m.pData + m_vertex.start * stride, vertex, count * stride);
|
||||
|
||||
m_ctx->Unmap(m_vb, 0);
|
||||
return false;
|
||||
}
|
||||
|
||||
*vertex = (uint8*)m.pData + m_vertex.start * stride;
|
||||
|
||||
m_vertex.count = count;
|
||||
m_vertex.stride = stride;
|
||||
|
||||
IASetVertexBuffer(m_vb, stride);
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSDevice11::IAUnmapVertexBuffer()
|
||||
{
|
||||
m_ctx->Unmap(m_vb, 0);
|
||||
|
||||
IASetVertexBuffer(m_vb, m_vertex.stride);
|
||||
}
|
||||
|
||||
void GSDevice11::IASetVertexBuffer(ID3D11Buffer* vb, size_t stride)
|
||||
|
|
|
@ -170,6 +170,8 @@ public:
|
|||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, ID3D11PixelShader* ps, ID3D11Buffer* ps_cb, ID3D11BlendState* bs, bool linear = true);
|
||||
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count);
|
||||
void IAUnmapVertexBuffer();
|
||||
void IASetVertexBuffer(ID3D11Buffer* vb, size_t stride);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetIndexBuffer(ID3D11Buffer* ib);
|
||||
|
@ -190,7 +192,6 @@ public:
|
|||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||
void OMSetRenderTargets(const GSVector2i& rtsize, ID3D11UnorderedAccessView** uav, int count, const GSVector4i* scissor = NULL);
|
||||
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel);
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
|
|
|
@ -911,6 +911,18 @@ void GSDevice9::SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* verti
|
|||
}
|
||||
|
||||
void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t count)
|
||||
{
|
||||
void* ptr = NULL;
|
||||
|
||||
if(IAMapVertexBuffer(&ptr, stride, count))
|
||||
{
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
IAUnmapVertexBuffer();
|
||||
}
|
||||
}
|
||||
|
||||
bool GSDevice9::IAMapVertexBuffer(void** vertex, size_t stride, size_t count)
|
||||
{
|
||||
ASSERT(m_vertex.count == 0);
|
||||
|
||||
|
@ -930,7 +942,7 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun
|
|||
|
||||
hr = m_dev->CreateVertexBuffer(m_vertex.limit * stride, D3DUSAGE_DYNAMIC | D3DUSAGE_WRITEONLY, 0, D3DPOOL_DEFAULT, &m_vb, NULL);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
if(FAILED(hr)) return false;
|
||||
}
|
||||
|
||||
uint32 flags = D3DLOCK_NOOVERWRITE;
|
||||
|
@ -942,19 +954,22 @@ void GSDevice9::IASetVertexBuffer(const void* vertex, size_t stride, size_t coun
|
|||
flags = D3DLOCK_DISCARD;
|
||||
}
|
||||
|
||||
void* ptr = NULL;
|
||||
|
||||
if(SUCCEEDED(m_vb->Lock(m_vertex.start * stride, count * stride, &ptr, flags)))
|
||||
if(FAILED(m_vb->Lock(m_vertex.start * stride, count * stride, vertex, flags)))
|
||||
{
|
||||
GSVector4i::storent(ptr, vertex, count * stride);
|
||||
|
||||
m_vb->Unlock();
|
||||
return false;
|
||||
}
|
||||
|
||||
m_vertex.count = count;
|
||||
m_vertex.stride = stride;
|
||||
|
||||
IASetVertexBuffer(m_vb, stride);
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSDevice9::IAUnmapVertexBuffer()
|
||||
{
|
||||
m_vb->Unlock();
|
||||
|
||||
IASetVertexBuffer(m_vb, m_vertex.stride);
|
||||
}
|
||||
|
||||
void GSDevice9::IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride)
|
||||
|
|
|
@ -196,6 +196,8 @@ public:
|
|||
void StretchRect(GSTexture* st, const GSVector4& sr, GSTexture* dt, const GSVector4& dr, IDirect3DPixelShader9* ps, const float* ps_cb, int ps_cb_len, Direct3DBlendState9* bs, bool linear = true);
|
||||
|
||||
void IASetVertexBuffer(const void* vertex, size_t stride, size_t count);
|
||||
bool IAMapVertexBuffer(void** vertex, size_t stride, size_t count);
|
||||
void IAUnmapVertexBuffer();
|
||||
void IASetVertexBuffer(IDirect3DVertexBuffer9* vb, size_t stride);
|
||||
void IASetIndexBuffer(const void* index, size_t count);
|
||||
void IASetIndexBuffer(IDirect3DIndexBuffer9* ib);
|
||||
|
@ -216,7 +218,6 @@ public:
|
|||
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DVertexShader9** vs, const D3DVERTEXELEMENT9* layout, int count, IDirect3DVertexDeclaration9** il);
|
||||
HRESULT CompileShader(uint32 id, const string& entry, const D3DXMACRO* macro, IDirect3DPixelShader9** ps);
|
||||
|
||||
void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim);
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel) {}
|
||||
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
|
||||
|
|
|
@ -279,7 +279,6 @@ public:
|
|||
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
|
||||
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;}
|
||||
|
||||
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
|
||||
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
|
||||
virtual void SetupGS(GSSelector sel) = 0;
|
||||
virtual void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel) = 0;
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRenderer.h"
|
||||
|
||||
GSRenderer::GSRenderer(GSVertexTrace* vt, size_t vertex_stride)
|
||||
: GSState(vt, vertex_stride)
|
||||
, m_dev(NULL)
|
||||
GSRenderer::GSRenderer()
|
||||
: m_dev(NULL)
|
||||
, m_shader(0)
|
||||
{
|
||||
m_GStitleInfoBuffer[0] = 0;
|
||||
|
|
|
@ -58,7 +58,7 @@ public:
|
|||
int s_saven;
|
||||
|
||||
public:
|
||||
GSRenderer(GSVertexTrace* vt, size_t vertex_stride);
|
||||
GSRenderer();
|
||||
virtual ~GSRenderer();
|
||||
|
||||
virtual bool CreateWnd(const string& title, int w, int h);
|
||||
|
|
|
@ -23,12 +23,10 @@
|
|||
#include "GSRendererCS.h"
|
||||
|
||||
GSRendererCS::GSRendererCS()
|
||||
: GSRenderer(new GSVertexTraceDX11(this), sizeof(GSVertexHW11))
|
||||
: GSRenderer()
|
||||
{
|
||||
m_nativeres = true;
|
||||
|
||||
InitConvertVertex(GSRendererCS);
|
||||
|
||||
memset(m_vm_valid, 0, sizeof(m_vm_valid));
|
||||
}
|
||||
|
||||
|
@ -233,26 +231,6 @@ GSTexture* GSRendererCS::GetOutput(int i)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
|
||||
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
|
||||
|
||||
GSVector4i v0 = ((GSVector4i*)s)[0];
|
||||
GSVector4i v1 = ((GSVector4i*)s)[1];
|
||||
|
||||
if(tme && fst)
|
||||
{
|
||||
// TODO: modify VertexTrace to read uv from v1.u16[0], v1.u16[1], then this step is not needed
|
||||
|
||||
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
|
||||
}
|
||||
|
||||
((GSVector4i*)d)[0] = v0;
|
||||
((GSVector4i*)d)[1] = v1;
|
||||
}
|
||||
|
||||
void GSRendererCS::Draw()
|
||||
{
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
|
@ -260,7 +238,7 @@ void GSRendererCS::Draw()
|
|||
|
||||
GSVector2i rtsize(2048, 2048);
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
|
||||
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
|
@ -286,7 +264,7 @@ void GSRendererCS::Draw()
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
// TODO: unswizzle pages of r to a texture, check m_vm_valid, bit not set cpu->gpu, set gpu->gpu
|
||||
|
||||
|
@ -317,7 +295,7 @@ void GSRendererCS::Draw()
|
|||
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology;
|
||||
|
||||
switch(m_vt->m_primclass)
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
|
@ -333,7 +311,7 @@ void GSRendererCS::Draw()
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertexHW11), m_vertex.next);
|
||||
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next);
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
|
||||
|
@ -407,7 +385,7 @@ void GSRendererCS::Draw()
|
|||
GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
gs_sel.prim = m_vt->m_primclass;
|
||||
gs_sel.prim = m_vt.m_primclass;
|
||||
|
||||
CComPtr<ID3D11GeometryShader> gs;
|
||||
|
||||
|
|
|
@ -120,9 +120,6 @@ class GSRendererCS : public GSRenderer
|
|||
bool GetOffsetBuffer(OffsetBuffer** fzbo);
|
||||
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
void VSync(int field);
|
||||
GSTexture* GetOutput(int i);
|
||||
|
|
|
@ -23,10 +23,9 @@
|
|||
#include "GSRendererDX.h"
|
||||
#include "GSDeviceDX.h"
|
||||
|
||||
GSRendererDX::GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter)
|
||||
: GSRendererHW(vt, vertex_stride, tc)
|
||||
GSRendererDX::GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter)
|
||||
: GSRendererHW(tc)
|
||||
, m_pixelcenter(pixelcenter)
|
||||
, m_topology(-1)
|
||||
{
|
||||
m_logz = !!theApp.GetConfig("logz", 0);
|
||||
m_fba = !!theApp.GetConfig("fba", 1);
|
||||
|
@ -61,7 +60,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
GSVector4 s = GSVector4(rtscale.x / rtsize.x, rtscale.y / rtsize.y);
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
|
@ -111,7 +110,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
|
||||
if(!IsOpaque())
|
||||
{
|
||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt->m_primclass == GS_LINE_CLASS;
|
||||
om_bsel.abe = PRIM->ABE || PRIM->AA1 && m_vt.m_primclass == GS_LINE_CLASS;
|
||||
|
||||
om_bsel.a = context->ALPHA.A;
|
||||
om_bsel.b = context->ALPHA.B;
|
||||
|
@ -154,11 +153,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
{
|
||||
if(context->ZBUF.PSM == PSM_PSMZ24)
|
||||
{
|
||||
if(m_vt->m_max.p.z > 0xffffff)
|
||||
if(m_vt.m_max.p.z > 0xffffff)
|
||||
{
|
||||
ASSERT(m_vt->m_min.p.z > 0xffffff);
|
||||
ASSERT(m_vt.m_min.p.z > 0xffffff);
|
||||
// Fixme :Following conditional fixes some dialog frame in Wild Arms 3, but may not be what was intended.
|
||||
if (m_vt->m_min.p.z > 0xffffff)
|
||||
if (m_vt.m_min.p.z > 0xffffff)
|
||||
{
|
||||
vs_sel.bppz = 1;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
|
@ -167,11 +166,11 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
}
|
||||
else if(context->ZBUF.PSM == PSM_PSMZ16 || context->ZBUF.PSM == PSM_PSMZ16S)
|
||||
{
|
||||
if(m_vt->m_max.p.z > 0xffff)
|
||||
if(m_vt.m_max.p.z > 0xffff)
|
||||
{
|
||||
ASSERT(m_vt->m_min.p.z > 0xffff); // sfex capcom logo
|
||||
ASSERT(m_vt.m_min.p.z > 0xffff); // sfex capcom logo
|
||||
// Fixme : Same as above, I guess.
|
||||
if (m_vt->m_min.p.z > 0xffff)
|
||||
if (m_vt.m_min.p.z > 0xffff)
|
||||
{
|
||||
vs_sel.bppz = 2;
|
||||
om_dssel.ztst = ZTST_ALWAYS;
|
||||
|
@ -213,7 +212,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
GSDeviceDX::GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
gs_sel.prim = m_vt->m_primclass;
|
||||
gs_sel.prim = m_vt.m_primclass;
|
||||
|
||||
// ps
|
||||
|
||||
|
@ -281,7 +280,7 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
ps_sel.aem = env.TEXA.AEM;
|
||||
ps_sel.tfx = context->TEX0.TFX;
|
||||
ps_sel.tcc = context->TEX0.TCC;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt->IsLinear() : m_filter;
|
||||
ps_sel.ltf = m_filter == 2 ? m_vt.IsLinear() : m_filter;
|
||||
ps_sel.rt = tex->m_target;
|
||||
|
||||
int w = tex->m_texture->GetWidth();
|
||||
|
@ -331,7 +330,6 @@ void GSRendererDX::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sourc
|
|||
uint8 afix = context->ALPHA.FIX;
|
||||
|
||||
dev->SetupOM(om_dssel, om_bsel, afix);
|
||||
dev->SetupIA(m_vertex.buff, m_vertex.next, m_index.buff, m_index.tail, m_topology);
|
||||
dev->SetupVS(vs_sel, &vs_cb);
|
||||
dev->SetupGS(gs_sel);
|
||||
dev->SetupPS(ps_sel, &ps_cb, ps_ssel);
|
||||
|
|
|
@ -32,13 +32,11 @@ class GSRendererDX : public GSRendererHW
|
|||
bool UserHacks_AlphaHack;
|
||||
|
||||
protected:
|
||||
int m_topology;
|
||||
|
||||
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
virtual void UpdateFBA(GSTexture* rt) {}
|
||||
|
||||
public:
|
||||
GSRendererDX(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
|
||||
GSRendererDX(GSTextureCache* tc, const GSVector2& pixelcenter = GSVector2(0, 0));
|
||||
virtual ~GSRendererDX();
|
||||
|
||||
};
|
||||
|
|
|
@ -25,9 +25,8 @@
|
|||
#include "resource.h"
|
||||
|
||||
GSRendererDX11::GSRendererDX11()
|
||||
: GSRendererDX(new GSVertexTraceDX11(this), sizeof(GSVertexHW11), new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
|
||||
: GSRendererDX(new GSTextureCache11(this), GSVector2(-0.5f, -0.5f))
|
||||
{
|
||||
InitConvertVertex(GSRendererDX11);
|
||||
}
|
||||
|
||||
bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
||||
|
@ -38,43 +37,40 @@ bool GSRendererDX11::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX11::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
GSVertex* s = (GSVertex*)((GSVertexHW11*)m_vertex.buff + src_index);
|
||||
GSVertexHW11* d = (GSVertexHW11*)m_vertex.buff + dst_index;
|
||||
|
||||
GSVector4i v0 = ((GSVector4i*)s)[0];
|
||||
GSVector4i v1 = ((GSVector4i*)s)[1];
|
||||
|
||||
if(tme && fst)
|
||||
{
|
||||
// TODO: modify VertexTrace and the shaders to read uv from v1.u16[0], v1.u16[1], then this step is not needed
|
||||
|
||||
v0 = GSVector4i::cast(GSVector4(v1.uph16()).xyzw(GSVector4::cast(v0))); // uv => st
|
||||
}
|
||||
|
||||
((GSVector4i*)d)[0] = v0;
|
||||
((GSVector4i*)d)[1] = v1;
|
||||
}
|
||||
|
||||
void GSRendererDX11::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
switch(m_vt->m_primclass)
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
void* ptr = NULL;
|
||||
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertex), m_vertex.next))
|
||||
{
|
||||
GSVector4i::storent(ptr, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
||||
dev->IAUnmapVertexBuffer();
|
||||
}
|
||||
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
|
||||
D3D11_PRIMITIVE_TOPOLOGY t;
|
||||
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
t = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
t = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
m_topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
t = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
dev->IASetPrimitiveTopology(t);
|
||||
|
||||
__super::DrawPrims(rt, ds, tex);
|
||||
}
|
||||
|
|
|
@ -28,15 +28,8 @@
|
|||
class GSRendererDX11 : public GSRendererDX
|
||||
{
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
|
||||
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.x;}
|
||||
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW11*)vertex)->p.y;}
|
||||
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW11*)vertex)->c0;}
|
||||
void SetColor(void* vertex, uint32 c) const {((GSVertexHW11*)vertex)->c0 = c;}
|
||||
|
||||
public:
|
||||
GSRendererDX11();
|
||||
virtual ~GSRendererDX11() {}
|
||||
|
|
|
@ -25,9 +25,8 @@
|
|||
#include "resource.h"
|
||||
|
||||
GSRendererDX9::GSRendererDX9()
|
||||
: GSRendererDX(new GSVertexTraceDX9(this), sizeof(GSVertexHW9), new GSTextureCache9(this))
|
||||
: GSRendererDX(new GSTextureCache9(this))
|
||||
{
|
||||
InitConvertVertex(GSRendererDX9);
|
||||
}
|
||||
|
||||
bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
||||
|
@ -57,56 +56,21 @@ bool GSRendererDX9::CreateDevice(GSDevice* dev)
|
|||
return true;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererDX9::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
GSVertex* s = (GSVertex*)((GSVertexHW9*)m_vertex.buff + src_index);
|
||||
GSVertexHW9* d = (GSVertexHW9*)m_vertex.buff + dst_index;
|
||||
|
||||
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
|
||||
|
||||
if(tme && !fst)
|
||||
{
|
||||
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
|
||||
}
|
||||
else
|
||||
{
|
||||
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
|
||||
}
|
||||
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16());
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4::loadl(&s->ST);
|
||||
}
|
||||
}
|
||||
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
|
||||
|
||||
d->p = p;
|
||||
d->t = t;
|
||||
}
|
||||
|
||||
void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex)
|
||||
{
|
||||
switch(m_vt->m_primclass)
|
||||
D3DPRIMITIVETYPE topology;
|
||||
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
|
||||
m_topology = D3DPT_POINTLIST;
|
||||
topology = D3DPT_POINTLIST;
|
||||
|
||||
break;
|
||||
|
||||
case GS_LINE_CLASS:
|
||||
|
||||
m_topology = D3DPT_LINELIST;
|
||||
topology = D3DPT_LINELIST;
|
||||
|
||||
if(PRIM->IIP == 0)
|
||||
{
|
||||
|
@ -122,7 +86,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
case GS_TRIANGLE_CLASS:
|
||||
|
||||
m_topology = D3DPT_TRIANGLELIST;
|
||||
topology = D3DPT_TRIANGLELIST;
|
||||
|
||||
if(PRIM->IIP == 0)
|
||||
{
|
||||
|
@ -138,7 +102,7 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
|
||||
case GS_SPRITE_CLASS:
|
||||
|
||||
m_topology = D3DPT_TRIANGLELIST;
|
||||
topology = D3DPT_TRIANGLELIST;
|
||||
|
||||
// each sprite converted to quad needs twice the space
|
||||
|
||||
|
@ -154,29 +118,35 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
size_t count = m_vertex.next;
|
||||
|
||||
int i = (int)count * 2 - 4;
|
||||
GSVertexHW9* s = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * count] - 2;
|
||||
GSVertexHW9* q = (GSVertexHW9*)&m_vertex.buff[sizeof(GSVertexHW9) * (count * 2)] - 4;
|
||||
uint32* RESTRICT index = &m_index.buff[count * 3] - 6;
|
||||
GSVertex* s = &m_vertex.buff[count - 2];
|
||||
GSVertex* q = &m_vertex.buff[count * 2 - 4];
|
||||
uint32* RESTRICT index = &m_index.buff[count * 3 - 6];
|
||||
|
||||
for(; i >= 0; i -= 4, s -= 2, q -= 4, index -= 6)
|
||||
{
|
||||
GSVertexHW9 v0 = s[0];
|
||||
GSVertexHW9 v1 = s[1];
|
||||
GSVertex v0 = s[0];
|
||||
GSVertex v1 = s[1];
|
||||
|
||||
v0.p = v0.p.xyzw(v1.p); // z, q
|
||||
v0.t = v0.t.xyzw(v1.t); // c, f
|
||||
v0.RGBAQ = v1.RGBAQ;
|
||||
v0.XYZ.Z = v1.XYZ.Z;
|
||||
v0.FOG = v1.FOG;
|
||||
|
||||
q[0] = v0;
|
||||
q[3] = v1;
|
||||
|
||||
// swap x, s
|
||||
// swap x, s, u
|
||||
|
||||
GSVector4 p = v0.p.insert<0, 0>(v1.p);
|
||||
GSVector4 t = v0.t.insert<0, 0>(v1.t);
|
||||
v1.p = v1.p.insert<0, 0>(v0.p);
|
||||
v1.t = v1.t.insert<0, 0>(v0.t);
|
||||
v0.p = p;
|
||||
v0.t = t;
|
||||
uint16 x = v0.XYZ.X;
|
||||
v0.XYZ.X = v1.XYZ.X;
|
||||
v1.XYZ.X = x;
|
||||
|
||||
float s = v0.ST.S;
|
||||
v0.ST.S = v1.ST.S;
|
||||
v1.ST.S = s;
|
||||
|
||||
uint16 u = v0.U;
|
||||
v0.U = v1.U;
|
||||
v1.U = u;
|
||||
|
||||
q[1] = v0;
|
||||
q[2] = v1;
|
||||
|
@ -199,7 +169,56 @@ void GSRendererDX9::DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Sour
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
(*(GSDevice9*)m_dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
|
||||
GSDevice9* dev = (GSDevice9*)m_dev;
|
||||
|
||||
(*dev)->SetRenderState(D3DRS_SHADEMODE, PRIM->IIP ? D3DSHADE_GOURAUD : D3DSHADE_FLAT); // TODO
|
||||
|
||||
void* ptr = NULL;
|
||||
|
||||
if(dev->IAMapVertexBuffer(&ptr, sizeof(GSVertexHW9), m_vertex.next))
|
||||
{
|
||||
GSVertex* RESTRICT s = (GSVertex*)m_vertex.buff;
|
||||
GSVertexHW9* RESTRICT d = (GSVertexHW9*)ptr;
|
||||
|
||||
for(int i = 0; i < m_vertex.next; i++, s++, d++)
|
||||
{
|
||||
GSVector4 p = GSVector4(GSVector4i::load(s->XYZ.u32[0]).upl16());
|
||||
|
||||
if(PRIM->TME && !PRIM->FST)
|
||||
{
|
||||
p = p.xyxy(GSVector4((float)s->XYZ.Z, s->RGBAQ.Q));
|
||||
}
|
||||
else
|
||||
{
|
||||
p = p.xyxy(GSVector4::load((float)s->XYZ.Z));
|
||||
}
|
||||
|
||||
GSVector4 t = GSVector4::zero();
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
if(PRIM->FST)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16());
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4::loadl(&s->ST);
|
||||
}
|
||||
}
|
||||
|
||||
t = t.xyxy(GSVector4::cast(GSVector4i(s->RGBAQ.u32[0], s->FOG)));
|
||||
|
||||
d->p = p;
|
||||
d->t = t;
|
||||
}
|
||||
|
||||
dev->IAUnmapVertexBuffer();
|
||||
}
|
||||
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
|
||||
__super::DrawPrims(rt, ds, tex);
|
||||
}
|
||||
|
@ -220,7 +239,7 @@ void GSRendererDX9::UpdateFBA(GSTexture* rt)
|
|||
GSVector4 s = GSVector4(rt->GetScale().x / rt->GetWidth(), rt->GetScale().y / rt->GetHeight());
|
||||
GSVector4 o = GSVector4(-1.0f, 1.0f);
|
||||
|
||||
GSVector4 src = ((m_vt->m_min.p.xyxy(m_vt->m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 src = ((m_vt.m_min.p.xyxy(m_vt.m_max.p) + o.xxyy()) * s.xyxy()).sat(o.zzyy());
|
||||
GSVector4 dst = src * 2.0f + o.xxxx();
|
||||
|
||||
GSVertexPT1 vertices[] =
|
||||
|
|
|
@ -34,17 +34,9 @@ protected:
|
|||
Direct3DBlendState9 bs;
|
||||
} m_fba;
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex);
|
||||
void UpdateFBA(GSTexture* rt);
|
||||
|
||||
int GetPosX(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.x;}
|
||||
int GetPosY(const void* vertex) const {return (int)((const GSVertexHW9*)vertex)->p.y;}
|
||||
uint32 GetColor(const void* vertex) const {return ((const GSVertexHW9*)vertex)->t.u32[2];}
|
||||
void SetColor(void* vertex, uint32 c) const {((GSVertexHW9*)vertex)->t.u32[2] = c;}
|
||||
|
||||
public:
|
||||
GSRendererDX9();
|
||||
virtual ~GSRendererDX9() {}
|
||||
|
|
|
@ -22,9 +22,8 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRendererHW.h"
|
||||
|
||||
GSRendererHW::GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc)
|
||||
: GSRenderer(vt, vertex_stride)
|
||||
, m_tc(tc)
|
||||
GSRendererHW::GSRendererHW(GSTextureCache* tc)
|
||||
: m_tc(tc)
|
||||
, m_width(1024)
|
||||
, m_height(1024)
|
||||
, m_skip(0)
|
||||
|
@ -211,7 +210,7 @@ void GSRendererHW::Draw()
|
|||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt->IsLinear());
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
|
@ -298,7 +297,7 @@ void GSRendererHW::Draw()
|
|||
|
||||
//
|
||||
|
||||
GSVector4i r = GSVector4i(m_vt->m_min.p.xyxy(m_vt->m_max.p)).rintersect(GSVector4i(context->scissor.in));
|
||||
GSVector4i r = GSVector4i(m_vt.m_min.p.xyxy(m_vt.m_max.p)).rintersect(GSVector4i(context->scissor.in));
|
||||
|
||||
if(fm != 0xffffffff)
|
||||
{
|
||||
|
@ -410,14 +409,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
|
||||
if(lines == 0)
|
||||
{
|
||||
if(m_vt->m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
|
||||
if(m_vt.m_primclass == GS_LINE_CLASS && (m_vertex.next == 448 * 2 || m_vertex.next == 512 * 2))
|
||||
{
|
||||
lines = m_vertex.next / 2;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_vt->m_primclass == GS_POINT_CLASS)
|
||||
if(m_vt.m_primclass == GS_POINT_CLASS)
|
||||
{
|
||||
if(m_vertex.next >= 16 * 512)
|
||||
{
|
||||
|
@ -428,14 +427,14 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
int ox = m_context->XYOFFSET.OFX;
|
||||
int oy = m_context->XYOFFSET.OFY;
|
||||
|
||||
const uint8* RESTRICT v = m_vertex.buff;
|
||||
const GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v++)
|
||||
{
|
||||
int x = (GetPosX(v) - ox) >> 4;
|
||||
int y = (GetPosY(v) - oy) >> 4;
|
||||
int x = (v->XYZ.X - ox) >> 4;
|
||||
int y = (v->XYZ.Y - oy) >> 4;
|
||||
|
||||
video[(y << 8) + (y << 7) + (y << 6) + x] = GetColor(v);
|
||||
video[(y << 8) + (y << 7) + (y << 6) + x] = v->RGBAQ.u32[0];
|
||||
}
|
||||
|
||||
return false;
|
||||
|
@ -445,7 +444,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
lines = 0;
|
||||
}
|
||||
}
|
||||
else if(m_vt->m_primclass == GS_LINE_CLASS)
|
||||
else if(m_vt.m_primclass == GS_LINE_CLASS)
|
||||
{
|
||||
if(m_vertex.next == lines * 2)
|
||||
{
|
||||
|
@ -458,10 +457,8 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
|
||||
t->m_texture->Update(GSVector4i(0, 0, 448, lines), video, 448 * 4);
|
||||
|
||||
size_t stride = m_vertex.stride;
|
||||
|
||||
memcpy(&m_vertex.buff[stride * 2], &m_vertex.buff[stride * (m_vertex.next - 2)], stride);
|
||||
memcpy(&m_vertex.buff[stride * 3], &m_vertex.buff[stride * (m_vertex.next - 1)], stride);
|
||||
m_vertex.buff[2] = m_vertex.buff[m_vertex.next - 2];
|
||||
m_vertex.buff[3] = m_vertex.buff[m_vertex.next - 1];
|
||||
|
||||
m_index.buff[0] = 0;
|
||||
m_index.buff[1] = 1;
|
||||
|
@ -473,7 +470,7 @@ bool GSRendererHW::OI_FFXII(GSTexture* rt, GSTexture* ds, GSTextureCache::Source
|
|||
m_vertex.head = m_vertex.tail = m_vertex.next = 4;
|
||||
m_index.tail = 6;
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
|
||||
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GS_TRIANGLE_CLASS);
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -505,11 +502,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
|
|||
{
|
||||
// missing red channel fix (looks alright in pcsx2 r5000+)
|
||||
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v += m_vertex.stride)
|
||||
for(int i = (int)m_vertex.next; i >= 0; i--, v++)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 c = v->RGBAQ.u32[0];
|
||||
|
||||
uint32 r = (c >> 0) & 0xff;
|
||||
uint32 g = (c >> 8) & 0xff;
|
||||
|
@ -517,11 +514,11 @@ bool GSRendererHW::OI_MetalSlug6(GSTexture* rt, GSTexture* ds, GSTextureCache::S
|
|||
|
||||
if(r == 0 && g != 0 && b != 0)
|
||||
{
|
||||
SetColor(v, (c & 0xffffff00) | ((g + b + 1) >> 1));
|
||||
v->RGBAQ.u32[0] = (c & 0xffffff00) | ((g + b + 1) >> 1);
|
||||
}
|
||||
}
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt->m_primclass);
|
||||
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, m_vt.m_primclass);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -701,7 +698,7 @@ bool GSRendererHW::OI_StarWarsForceUnleashed(GSTexture* rt, GSTexture* ds, GSTex
|
|||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_max.p.z == 0))
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_max.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
@ -757,7 +754,7 @@ bool GSRendererHW::OI_SpyroNewBeginning(GSTexture* rt, GSTexture* ds, GSTextureC
|
|||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
@ -783,7 +780,7 @@ bool GSRendererHW::OI_SpyroEternalNight(GSTexture* rt, GSTexture* ds, GSTextureC
|
|||
}
|
||||
else if(PRIM->TME)
|
||||
{
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt->m_max.p.z == m_vt->m_min.p.z && m_vt->m_min.p.z == 0x0))
|
||||
if((FBP == 0x0 || FBP == 0x01180) && FPSM == PSM_PSMCT32 && (m_vt.m_eq.z && m_vt.m_min.p.z == 0))
|
||||
{
|
||||
m_dev->ClearDepth(ds, 0);
|
||||
}
|
||||
|
@ -797,7 +794,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac
|
|||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FPSM = m_context->FRAME.PSM;
|
||||
|
||||
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt->m_max.p.z == m_vt->m_min.p.z)
|
||||
if (FPSM == PSM_PSMCT32 && FBP == 0x01c00 && !m_context->TEST.ATE && m_vt.m_eq.z)
|
||||
{
|
||||
m_context->TEST.ZTST = ZTST_ALWAYS;
|
||||
//m_dev->ClearDepth(ds, 0);
|
||||
|
@ -809,7 +806,7 @@ bool GSRendererHW::OI_TalesOfLegendia(GSTexture* rt, GSTexture* ds, GSTextureCac
|
|||
|
||||
bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
|
||||
{
|
||||
if(m_vt->m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||
if(m_vt.m_primclass == GS_POINT_CLASS && !PRIM->TME)
|
||||
{
|
||||
uint32 FBP = m_context->FRAME.Block();
|
||||
uint32 FBW = m_context->FRAME.FBW;
|
||||
|
@ -818,16 +815,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
|||
{
|
||||
if(m_vertex.next == 16)
|
||||
{
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = 0; i < 16; i++, v += m_vertex.stride)
|
||||
for(int i = 0; i < 16; i++, v++)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 c = v->RGBAQ.u32[0];
|
||||
uint32 a = c >> 24;
|
||||
|
||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||
|
||||
SetColor(v, c);
|
||||
v->RGBAQ.u32[0] = c;
|
||||
|
||||
m_mem.WritePixel32(i & 7, i >> 3, c, FBP, FBW);
|
||||
}
|
||||
|
@ -838,16 +835,16 @@ bool GSRendererHW::OI_PointListPalette(GSTexture* rt, GSTexture* ds, GSTextureCa
|
|||
}
|
||||
else if(m_vertex.next == 256)
|
||||
{
|
||||
uint8* RESTRICT v = m_vertex.buff;
|
||||
GSVertex* RESTRICT v = m_vertex.buff;
|
||||
|
||||
for(int i = 0; i < 256; i++, v += m_vertex.stride)
|
||||
for(int i = 0; i < 256; i++, v++)
|
||||
{
|
||||
uint32 c = GetColor(v);
|
||||
uint32 c = v->RGBAQ.u32[0];
|
||||
uint32 a = c >> 24;
|
||||
|
||||
c = (a >= 0x80 ? 0xff000000 : (a << 25)) | (c & 0x00ffffff);
|
||||
|
||||
SetColor(v, c);
|
||||
v->RGBAQ.u32[0] = c;
|
||||
|
||||
m_mem.WritePixel32(i & 15, i >> 4, c, FBP, FBW);
|
||||
}
|
||||
|
|
|
@ -126,11 +126,6 @@ private:
|
|||
|
||||
} m_hacks;
|
||||
|
||||
virtual int GetPosX(const void* vertex) const = 0;
|
||||
virtual int GetPosY(const void* vertex) const = 0;
|
||||
virtual uint32 GetColor(const void* vertex) const = 0;
|
||||
virtual void SetColor(void* vertex, uint32 c) const = 0;
|
||||
|
||||
#pragma endregion
|
||||
|
||||
protected:
|
||||
|
@ -139,7 +134,7 @@ protected:
|
|||
virtual void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) = 0;
|
||||
|
||||
public:
|
||||
GSRendererHW(GSVertexTrace* vt, size_t vertex_stride, GSTextureCache* tc);
|
||||
GSRendererHW(GSTextureCache* tc);
|
||||
virtual ~GSRendererHW();
|
||||
|
||||
void SetGameCRC(uint32 crc, int options);
|
||||
|
|
|
@ -32,11 +32,6 @@ class GSRendererNull : public GSRenderer
|
|||
};
|
||||
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
}
|
||||
|
||||
void Draw()
|
||||
{
|
||||
}
|
||||
|
@ -48,8 +43,7 @@ protected:
|
|||
|
||||
public:
|
||||
GSRendererNull()
|
||||
: GSRenderer(new GSVertexTraceNull(this), sizeof(GSVertex))
|
||||
: GSRenderer()
|
||||
{
|
||||
InitConvertVertex(GSRendererNull);
|
||||
}
|
||||
};
|
||||
|
|
|
@ -29,11 +29,8 @@ static FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
|||
const GSVector4 g_pos_scale(1.0f / 16, 1.0f / 16, 1.0f, 128.0f);
|
||||
|
||||
GSRendererSW::GSRendererSW(int threads)
|
||||
: GSRenderer(new GSVertexTraceSW(this), sizeof(GSVertexSW))
|
||||
, m_fzb(NULL)
|
||||
: m_fzb(NULL)
|
||||
{
|
||||
InitConvertVertex(GSRendererSW);
|
||||
|
||||
m_nativeres = true; // ignore ini, sw is always native
|
||||
|
||||
m_tc = new GSTextureCacheSW(this);
|
||||
|
@ -233,73 +230,67 @@ GSTexture* GSRendererSW::GetOutput(int i)
|
|||
return m_texture[i];
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
GSVertex* s = (GSVertex*)((GSVertexSW*)m_vertex.buff + src_index);
|
||||
GSVertexSW* d = (GSVertexSW*)m_vertex.buff + dst_index;
|
||||
|
||||
ASSERT(d->_pad.u32[0] != 0x12345678);
|
||||
|
||||
uint32 z = s->XYZ.Z;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - (GSVector4i)m_context->XYOFFSET;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
GSVector4 p, t, c;
|
||||
|
||||
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(fst)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4(s->ST.S, s->ST.T) * GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
|
||||
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
|
||||
}
|
||||
}
|
||||
|
||||
c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
|
||||
|
||||
d->p = p;
|
||||
d->c = c;
|
||||
d->t = t;
|
||||
|
||||
#ifdef _DEBUG
|
||||
d->_pad.u32[0] = 0x12345678; // means trouble if this has already been set, should only convert each vertex once
|
||||
#endif
|
||||
|
||||
if(prim == GS_SPRITE)
|
||||
{
|
||||
d->t.u32[3] = z;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererSW::Draw()
|
||||
{
|
||||
SharedData* sd = new SharedData(this);
|
||||
|
||||
shared_ptr<GSRasterizerData> data(sd);
|
||||
|
||||
sd->primclass = m_vt->m_primclass;
|
||||
sd->primclass = m_vt.m_primclass;
|
||||
sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
|
||||
sd->vertex = (GSVertexSW*)sd->buff;
|
||||
sd->vertex_count = m_vertex.next;
|
||||
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
|
||||
sd->index_count = m_index.tail;
|
||||
|
||||
memcpy(sd->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
|
||||
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
for(size_t i = 0; i < m_index.tail; i++)
|
||||
{
|
||||
ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678);
|
||||
// TODO: template, JIT
|
||||
|
||||
GSVertex* RESTRICT s = m_vertex.buff;
|
||||
GSVertexSW* RESTRICT d = sd->vertex;
|
||||
|
||||
GSVector4i o = (GSVector4i)m_context->XYOFFSET;
|
||||
GSVector4 tsize = GSVector4(0x10000 << m_context->TEX0.TW, 0x10000 << m_context->TEX0.TH);
|
||||
|
||||
for(size_t i = 0; i < m_vertex.next; i++, s++, d++)
|
||||
{
|
||||
uint32 z = s->XYZ.Z;
|
||||
|
||||
GSVector4i xy = GSVector4i::load((int)s->XYZ.u32[0]).upl16() - o;
|
||||
GSVector4i zf = GSVector4i((int)std::min<uint32>(z, 0xffffff00), s->FOG); // NOTE: larger values of z may roll over to 0 when converting back to uint32 later
|
||||
|
||||
GSVector4 p, t, c;
|
||||
|
||||
p = GSVector4(xy).xyxy(GSVector4(zf) + (GSVector4::m_x4f800000 & GSVector4::cast(zf.sra32(31)))) * g_pos_scale;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
if(PRIM->FST)
|
||||
{
|
||||
t = GSVector4(GSVector4i::load(s->UV).upl16() << (16 - 4));
|
||||
}
|
||||
else
|
||||
{
|
||||
t = GSVector4(s->ST.S, s->ST.T) * tsize;
|
||||
t = t.xyxy(GSVector4::load(s->RGBAQ.Q));
|
||||
}
|
||||
}
|
||||
|
||||
c = GSVector4::rgba32(s->RGBAQ.u32[0], 7);
|
||||
|
||||
d->p = p;
|
||||
d->c = c;
|
||||
d->t = t;
|
||||
|
||||
if(sd->primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
d->t.u32[3] = z;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
|
||||
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
|
||||
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
|
||||
|
@ -314,7 +305,7 @@ void GSRendererSW::Draw()
|
|||
GSScanlineGlobalData& gd = sd->global;
|
||||
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in);
|
||||
GSVector4i bbox = GSVector4i(m_vt->m_min.p.floor().xyxy(m_vt->m_max.p.ceil()));
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
|
||||
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
|
||||
|
||||
|
@ -713,7 +704,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
const GS_PRIM_CLASS primclass = m_vt->m_primclass;
|
||||
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
|
||||
|
||||
gd.vm = m_mem.m_vm8;
|
||||
|
||||
|
@ -790,7 +781,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
{
|
||||
gd.sel.fpsm = GSLocalMemory::m_psm[context->FRAME.PSM].fmt;
|
||||
|
||||
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt->m_eq.rgba != 0xffff)
|
||||
if((primclass == GS_LINE_CLASS || primclass == GS_TRIANGLE_CLASS) && m_vt.m_eq.rgba != 0xffff)
|
||||
{
|
||||
gd.sel.iip = PRIM->IIP;
|
||||
}
|
||||
|
@ -800,7 +791,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
gd.sel.tfx = context->TEX0.TFX;
|
||||
gd.sel.tcc = context->TEX0.TCC;
|
||||
gd.sel.fst = PRIM->FST;
|
||||
gd.sel.ltf = m_vt->IsLinear();
|
||||
gd.sel.ltf = m_vt.IsLinear();
|
||||
|
||||
if(GSLocalMemory::m_psm[context->TEX0.PSM].pal > 0)
|
||||
{
|
||||
|
@ -814,7 +805,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
gd.sel.wms = context->CLAMP.WMS;
|
||||
gd.sel.wmt = context->CLAMP.WMT;
|
||||
|
||||
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt->m_eq.rgba == 0xffff && m_vt->m_min.c.eq(GSVector4i(128)))
|
||||
if(gd.sel.tfx == TFX_MODULATE && gd.sel.tcc && m_vt.m_eq.rgba == 0xffff && m_vt.m_min.c.eq(GSVector4i(128)))
|
||||
{
|
||||
// modulate does not do anything when vertex color is 0x80
|
||||
|
||||
|
@ -833,7 +824,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
if(!t->Update(r)) {ASSERT(0); return false;}
|
||||
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt->m_lod.x > 0)
|
||||
if(s_dump)// && m_context->TEX1.MXL > 0 && m_context->TEX1.MMIN >= 2 && m_context->TEX1.MMIN <= 5 && m_vt.m_lod.x > 0)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
|
||||
|
@ -850,7 +841,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
gd.tex[0] = t->m_buff;
|
||||
gd.sel.tw = t->m_tw - 3;
|
||||
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt->m_lod.y > 0)
|
||||
if(m_mipmap && context->TEX1.MXL > 0 && context->TEX1.MMIN >= 2 && context->TEX1.MMIN <= 5 && m_vt.m_lod.y > 0)
|
||||
{
|
||||
// TEX1.MMIN
|
||||
// 000 p
|
||||
|
@ -860,13 +851,13 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
// 100 l round
|
||||
// 101 l tri
|
||||
|
||||
if(m_vt->m_lod.x > 0)
|
||||
if(m_vt.m_lod.x > 0)
|
||||
{
|
||||
gd.sel.ltf = context->TEX1.MMIN >> 2;
|
||||
}
|
||||
else
|
||||
{
|
||||
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt->m_lod.x <= 0 && m_vt->m_lod.y > 0
|
||||
// TODO: isbilinear(mmag) != isbilinear(mmin) && m_vt.m_lod.x <= 0 && m_vt.m_lod.y > 0
|
||||
}
|
||||
|
||||
gd.sel.mmin = (context->TEX1.MMIN & 1) + 1; // 1: round, 2: tri
|
||||
|
@ -875,9 +866,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
int mxl = (std::min<int>((int)context->TEX1.MXL, 6) << 16);
|
||||
int k = context->TEX1.K << 12;
|
||||
|
||||
if((int)m_vt->m_lod.x >= (int)context->TEX1.MXL)
|
||||
if((int)m_vt.m_lod.x >= (int)context->TEX1.MXL)
|
||||
{
|
||||
k = (int)m_vt->m_lod.x << 16; // set lod to max level
|
||||
k = (int)m_vt.m_lod.x << 16; // set lod to max level
|
||||
|
||||
gd.sel.lcm = 1; // lod is constant
|
||||
gd.sel.mmin = 1; // tri-linear is meaningless
|
||||
|
@ -891,7 +882,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
if(gd.sel.fst)
|
||||
{
|
||||
ASSERT(gd.sel.lcm == 1);
|
||||
ASSERT(((m_vt->m_min.t.uph(m_vt->m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
|
||||
ASSERT(((m_vt.m_min.t.uph(m_vt.m_max.t) == GSVector4::zero()).mask() & 3) == 3); // ratchet and clank (menu)
|
||||
|
||||
gd.sel.lcm = 1;
|
||||
}
|
||||
|
@ -920,8 +911,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
GIFRegTEX0 MIP_TEX0 = context->TEX0;
|
||||
GIFRegCLAMP MIP_CLAMP = context->CLAMP;
|
||||
|
||||
GSVector4 tmin = m_vt->m_min.t;
|
||||
GSVector4 tmax = m_vt->m_max.t;
|
||||
GSVector4 tmin = m_vt.m_min.t;
|
||||
GSVector4 tmax = m_vt.m_max.t;
|
||||
|
||||
static int s_counter = 0;
|
||||
|
||||
|
@ -971,8 +962,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
MIP_CLAMP.MAXU >>= 1;
|
||||
MIP_CLAMP.MAXV >>= 1;
|
||||
|
||||
m_vt->m_min.t *= 0.5f;
|
||||
m_vt->m_max.t *= 0.5f;
|
||||
m_vt.m_min.t *= 0.5f;
|
||||
m_vt.m_max.t *= 0.5f;
|
||||
|
||||
GSTextureCacheSW::Texture* t = m_tc->Lookup(MIP_TEX0, env.TEXA, gd.sel.tw + 3);
|
||||
|
||||
|
@ -1014,8 +1005,8 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
s_counter++;
|
||||
|
||||
m_vt->m_min.t = tmin;
|
||||
m_vt->m_max.t = tmax;
|
||||
m_vt.m_min.t = tmin;
|
||||
m_vt.m_max.t = tmax;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1025,7 +1016,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
GSVertexSW* RESTRICT v = data->vertex;
|
||||
|
||||
if(m_vt->m_eq.q)
|
||||
if(m_vt.m_eq.q)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
|
@ -1202,7 +1193,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
{
|
||||
gd.sel.zpsm = GSLocalMemory::m_psm[context->ZBUF.PSM].fmt;
|
||||
gd.sel.ztst = ztest ? context->TEST.ZTST : ZTST_ALWAYS;
|
||||
gd.sel.zoverflow = GSVector4i(m_vt->m_max.p).z == 0x80000000;
|
||||
gd.sel.zoverflow = GSVector4i(m_vt.m_max.p).z == 0x80000000;
|
||||
}
|
||||
|
||||
gd.fm = GSVector4i(fm);
|
||||
|
|
|
@ -71,9 +71,6 @@ protected:
|
|||
|
||||
bool GetScanlineGlobalData(SharedData* data);
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
public:
|
||||
GSRendererSW(int threads);
|
||||
virtual ~GSRendererSW();
|
||||
|
|
|
@ -26,7 +26,7 @@
|
|||
//#define Offset_ST // Fixes Persona3 mini map alignment which is off even in software rendering
|
||||
//#define Offset_UV // Fixes / breaks various titles
|
||||
|
||||
GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
||||
GSState::GSState()
|
||||
: m_version(6)
|
||||
, m_mt(false)
|
||||
, m_irq(NULL)
|
||||
|
@ -35,7 +35,7 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
, m_crc(0)
|
||||
, m_options(0)
|
||||
, m_frameskip(0)
|
||||
, m_vt(vt)
|
||||
, m_vt(this)
|
||||
{
|
||||
m_nativeres = !!theApp.GetConfig("nativeres", 0);
|
||||
|
||||
|
@ -44,15 +44,8 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
memset(&m_vertex, 0, sizeof(m_vertex));
|
||||
memset(&m_index, 0, sizeof(m_index));
|
||||
|
||||
ASSERT(vertex_stride >= sizeof(GSVertex));
|
||||
|
||||
m_vertex.stride = vertex_stride;
|
||||
m_vertex.tmp = (uint8*)_aligned_malloc(m_vertex.stride * 2, 32);
|
||||
|
||||
GrowVertexBuffer();
|
||||
|
||||
memset(m_cv, 0, sizeof(m_cv));
|
||||
|
||||
m_sssize = 0;
|
||||
|
||||
m_sssize += sizeof(m_version);
|
||||
|
@ -114,8 +107,6 @@ GSState::GSState(GSVertexTrace* vt, size_t vertex_stride)
|
|||
|
||||
GSState::~GSState()
|
||||
{
|
||||
_aligned_free(m_vertex.tmp);
|
||||
|
||||
if(m_vertex.buff) _aligned_free(m_vertex.buff);
|
||||
if(m_index.buff) _aligned_free(m_index.buff);
|
||||
}
|
||||
|
@ -211,7 +202,7 @@ void GSState::Reset()
|
|||
{
|
||||
printf("GS reset\n");
|
||||
|
||||
memset(m_mem.m_vm8, 0, m_mem.m_vmsize);
|
||||
// FIXME: memset(m_mem.m_vm8, 0, m_mem.m_vmsize); // bios logo not shown cut in half after reset, missing graphics in GoW after first FMV
|
||||
memset(&m_path[0], 0, sizeof(m_path[0]) * countof(m_path));
|
||||
memset(&m_v, 0, sizeof(m_v));
|
||||
|
||||
|
@ -1274,40 +1265,8 @@ void GSState::FlushPrim()
|
|||
{
|
||||
if(m_index.tail > 0)
|
||||
{
|
||||
if(0)
|
||||
{
|
||||
uint8* buff = new uint8[m_vertex.next];
|
||||
GSVertex buff[2];
|
||||
|
||||
memset(buff, 0, m_vertex.next);
|
||||
|
||||
for(size_t i = 0; i < m_index.tail; i++)
|
||||
{
|
||||
ASSERT(m_index.buff[i] < m_vertex.next);
|
||||
|
||||
buff[m_index.buff[i]] = 1;
|
||||
}
|
||||
|
||||
size_t count = 0;
|
||||
|
||||
for(size_t i = 0; i < m_vertex.next; i++)
|
||||
{
|
||||
if(buff[i] == 0)
|
||||
{
|
||||
count++;
|
||||
}
|
||||
}
|
||||
|
||||
if(count > 0)
|
||||
{
|
||||
printf("unref %lld %d/%d\n", m_perfmon.GetFrame(), count, m_vertex.next);
|
||||
}
|
||||
|
||||
delete [] buff;
|
||||
}
|
||||
|
||||
uint8* buff = m_vertex.tmp;
|
||||
|
||||
size_t stride = m_vertex.stride;
|
||||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
size_t next = m_vertex.next;
|
||||
|
@ -1326,11 +1285,11 @@ void GSState::FlushPrim()
|
|||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
unused = tail - head;
|
||||
memcpy(buff, &m_vertex.buff[stride * head], stride * unused);
|
||||
memcpy(buff, &m_vertex.buff[head], sizeof(GSVertex) * unused);
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
memcpy(buff, &m_vertex.buff[stride * head], stride); unused = 1;
|
||||
if(tail - 1 > head) {memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride); unused = 2;}
|
||||
buff[0] = m_vertex.buff[head]; unused = 1;
|
||||
if(tail - 1 > head) {buff[1] = m_vertex.buff[tail - 1]; unused = 2;}
|
||||
break;
|
||||
case GS_INVALID:
|
||||
break;
|
||||
|
@ -1345,7 +1304,7 @@ void GSState::FlushPrim()
|
|||
{
|
||||
// FIXME: berserk fpsm = 27 (8H)
|
||||
|
||||
m_vt->Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
m_vt.Update(m_vertex.buff, m_index.buff, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
|
||||
|
||||
Draw();
|
||||
|
||||
|
@ -1359,7 +1318,7 @@ void GSState::FlushPrim()
|
|||
|
||||
if(unused > 0)
|
||||
{
|
||||
memcpy(m_vertex.buff, buff, stride * unused);
|
||||
memcpy(m_vertex.buff, buff, sizeof(GSVertex) * unused);
|
||||
|
||||
m_vertex.tail = unused;
|
||||
m_vertex.next = next > head ? next - head : 0;
|
||||
|
@ -2182,20 +2141,18 @@ void GSState::UpdateVertexKick()
|
|||
m_fpGIFRegHandlers[GIF_A_D_REG_XYZ3] = m_fpGIFRegHandlerXYZ[prim][3];
|
||||
|
||||
m_fpGIFPackedRegHandlersC[GIF_REG_STQRGBAXYZF2] = m_fpGIFPackedRegHandlerSTQRGBAXYZF2[prim];
|
||||
|
||||
m_cvf = m_cv[prim][PRIM->TME][PRIM->FST];
|
||||
}
|
||||
|
||||
void GSState::GrowVertexBuffer()
|
||||
{
|
||||
int maxcount = std::max<int>(m_vertex.maxcount * 3 / 2, 10000);
|
||||
|
||||
uint8* vertex = (uint8*)_aligned_malloc(m_vertex.stride * maxcount, 16);
|
||||
GSVertex* vertex = (GSVertex*)_aligned_malloc(sizeof(GSVertex) * maxcount, 16);
|
||||
uint32* index = (uint32*)_aligned_malloc(sizeof(uint32) * maxcount * 3, 16); // worst case is slightly less than vertex number * 3
|
||||
|
||||
if(m_vertex.buff != NULL)
|
||||
{
|
||||
memcpy(vertex, m_vertex.buff, m_vertex.stride * m_vertex.tail);
|
||||
memcpy(vertex, m_vertex.buff, sizeof(GSVertex) * m_vertex.tail);
|
||||
|
||||
_aligned_free(m_vertex.buff);
|
||||
}
|
||||
|
@ -2227,17 +2184,13 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
GSVector4i v0(m_v.m[0]);
|
||||
GSVector4i v1(m_v.m[1]);
|
||||
|
||||
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[m_vertex.stride * tail];
|
||||
GSVector4i* RESTRICT tailptr = (GSVector4i*)&m_vertex.buff[tail];
|
||||
|
||||
tailptr[0] = v0;
|
||||
tailptr[1] = v1;
|
||||
|
||||
m_vertex.xy[xy_tail & 3] = GSVector4(v1.upl32(v1.sub16(GSVector4i::load(m_ofxy)).sra16(4)).upl16()); // zw not sign extended, only useful for eq tests
|
||||
|
||||
#ifdef _DEBUG
|
||||
memset(&tailptr[2], 0, m_vertex.stride - sizeof(GSVertex));
|
||||
#endif
|
||||
|
||||
m_vertex.tail = ++tail;
|
||||
m_vertex.xy_tail = ++xy_tail;
|
||||
|
||||
|
@ -2353,8 +2306,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
|
||||
uint32* RESTRICT buff = &m_index.buff[m_index.tail];
|
||||
|
||||
size_t src_index = head;
|
||||
|
||||
switch(prim)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
|
@ -2362,7 +2313,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 1;
|
||||
m_index.tail += 1;
|
||||
(this->*m_cvf)(head, head);
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2370,18 +2320,20 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 2;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
(this->*m_cvf)(head + 0, head + 0);
|
||||
(this->*m_cvf)(head + 1, head + 1);
|
||||
break;
|
||||
case GS_LINESTRIP:
|
||||
if(next < head) {head = next; m_vertex.tail = next + 2;}
|
||||
if(next < head)
|
||||
{
|
||||
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
|
||||
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
|
||||
head = next;
|
||||
m_vertex.tail = next + 2;
|
||||
}
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
if(head + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
|
||||
/*if(head + 1 >= next)*/ (this->*m_cvf)(head + 1, src_index + 1); // this is always a new vertex
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2390,21 +2342,22 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 3;
|
||||
m_vertex.next = head + 3;
|
||||
m_index.tail += 3;
|
||||
(this->*m_cvf)(head + 0, head + 0);
|
||||
(this->*m_cvf)(head + 1, head + 1);
|
||||
(this->*m_cvf)(head + 2, head + 2);
|
||||
break;
|
||||
case GS_TRIANGLESTRIP:
|
||||
if(next < head) {head = next; m_vertex.tail = next + 3;}
|
||||
if(next < head)
|
||||
{
|
||||
m_vertex.buff[next + 0] = m_vertex.buff[head + 0];
|
||||
m_vertex.buff[next + 1] = m_vertex.buff[head + 1];
|
||||
m_vertex.buff[next + 2] = m_vertex.buff[head + 2];
|
||||
head = next;
|
||||
m_vertex.tail = next + 3;
|
||||
}
|
||||
buff[0] = head + 0;
|
||||
buff[1] = head + 1;
|
||||
buff[2] = head + 2;
|
||||
m_vertex.head = head + 1;
|
||||
m_vertex.next = head + 3;
|
||||
m_index.tail += 3;
|
||||
if(src_index + 0 >= next) (this->*m_cvf)(head + 0, src_index + 0);
|
||||
if(src_index + 1 >= next) (this->*m_cvf)(head + 1, src_index + 1);
|
||||
/*if(src_index + 2 >= next)*/ (this->*m_cvf)(head + 2, src_index + 2); // this is always a new vertex
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
// TODO: remove gaps, next == head && head < tail - 3 || next > head && next < tail - 2 (very rare)
|
||||
|
@ -2413,9 +2366,6 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
buff[2] = tail - 1;
|
||||
m_vertex.next = tail;
|
||||
m_index.tail += 3;
|
||||
if(head >= next) (this->*m_cvf)(head, head);
|
||||
if(tail - 2 >= next) (this->*m_cvf)(tail - 2, tail - 2);
|
||||
/*if(tail - 1 >= next)*/ (this->*m_cvf)(tail - 1, tail - 1); // this is always a new vertex
|
||||
break;
|
||||
case GS_SPRITE:
|
||||
buff[0] = head + 0;
|
||||
|
@ -2423,10 +2373,8 @@ __forceinline void GSState::VertexKick(uint32 skip)
|
|||
m_vertex.head = head + 2;
|
||||
m_vertex.next = head + 2;
|
||||
m_index.tail += 2;
|
||||
(this->*m_cvf)(head + 0, head + 0);
|
||||
(this->*m_cvf)(head + 1, head + 1);
|
||||
break;
|
||||
case GS_INVALID:
|
||||
case GS_INVALID:
|
||||
m_vertex.tail = head;
|
||||
break;
|
||||
default:
|
||||
|
@ -2492,7 +2440,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
|
|||
|
||||
if(wms + wmt < 6)
|
||||
{
|
||||
GSVector4 st = m_vt->m_min.t.xyxy(m_vt->m_max.t);
|
||||
GSVector4 st = m_vt.m_min.t.xyxy(m_vt.m_max.t);
|
||||
|
||||
if(linear)
|
||||
{
|
||||
|
@ -2570,7 +2518,7 @@ void GSState::GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFR
|
|||
|
||||
void GSState::GetAlphaMinMax()
|
||||
{
|
||||
if(m_vt->m_alpha.valid)
|
||||
if(m_vt.m_alpha.valid)
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
@ -2578,7 +2526,7 @@ void GSState::GetAlphaMinMax()
|
|||
const GSDrawingEnvironment& env = m_env;
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector4i a = m_vt->m_min.c.uph32(m_vt->m_max.c).zzww();
|
||||
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
|
||||
|
||||
if(PRIM->TME && context->TEX0.TCC)
|
||||
{
|
||||
|
@ -2630,9 +2578,9 @@ void GSState::GetAlphaMinMax()
|
|||
}
|
||||
}
|
||||
|
||||
m_vt->m_alpha.min = a.x;
|
||||
m_vt->m_alpha.max = a.z;
|
||||
m_vt->m_alpha.valid = true;
|
||||
m_vt.m_alpha.min = a.x;
|
||||
m_vt.m_alpha.max = a.z;
|
||||
m_vt.m_alpha.valid = true;
|
||||
}
|
||||
|
||||
bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
|
||||
|
@ -2649,8 +2597,8 @@ bool GSState::TryAlphaTest(uint32& fm, uint32& zm)
|
|||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
int amin = m_vt->m_alpha.min;
|
||||
int amax = m_vt->m_alpha.max;
|
||||
int amin = m_vt.m_alpha.min;
|
||||
int amax = m_vt.m_alpha.max;
|
||||
|
||||
int aref = context->TEST.AREF;
|
||||
|
||||
|
@ -2734,8 +2682,8 @@ bool GSState::IsOpaque()
|
|||
{
|
||||
GetAlphaMinMax();
|
||||
|
||||
amin = m_vt->m_alpha.min;
|
||||
amax = m_vt->m_alpha.max;
|
||||
amin = m_vt.m_alpha.min;
|
||||
amax = m_vt.m_alpha.max;
|
||||
}
|
||||
else if(context->ALPHA.C == 1)
|
||||
{
|
||||
|
|
|
@ -143,12 +143,10 @@ protected:
|
|||
|
||||
struct
|
||||
{
|
||||
uint8* buff;
|
||||
size_t stride;
|
||||
GSVertex* buff;
|
||||
size_t head, tail, next, maxcount; // head: first vertex, tail: last vertex + 1, next: last indexed + 1
|
||||
GSVector4 xy[4];
|
||||
size_t xy_tail;
|
||||
uint8* tmp;
|
||||
} m_vertex;
|
||||
|
||||
struct
|
||||
|
@ -157,26 +155,6 @@ protected:
|
|||
size_t tail;
|
||||
} m_index;
|
||||
|
||||
typedef void (GSState::*ConvertVertexPtr)(size_t dst_index, size_t src_index);
|
||||
|
||||
ConvertVertexPtr m_cv[8][2][2], m_cvf; // [PRIM][TME][FST]
|
||||
|
||||
#define InitConvertVertex2(T, P) \
|
||||
m_cv[P][0][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 0>; \
|
||||
m_cv[P][0][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 0, 1>; \
|
||||
m_cv[P][1][0] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 0>; \
|
||||
m_cv[P][1][1] = (ConvertVertexPtr)&T::ConvertVertex<P, 1, 1>; \
|
||||
|
||||
#define InitConvertVertex(T) \
|
||||
InitConvertVertex2(T, GS_POINTLIST) \
|
||||
InitConvertVertex2(T, GS_LINELIST) \
|
||||
InitConvertVertex2(T, GS_LINESTRIP) \
|
||||
InitConvertVertex2(T, GS_TRIANGLELIST) \
|
||||
InitConvertVertex2(T, GS_TRIANGLESTRIP) \
|
||||
InitConvertVertex2(T, GS_TRIANGLEFAN) \
|
||||
InitConvertVertex2(T, GS_SPRITE) \
|
||||
InitConvertVertex2(T, GS_INVALID) \
|
||||
|
||||
void UpdateContext();
|
||||
void UpdateScissor();
|
||||
|
||||
|
@ -189,7 +167,7 @@ protected:
|
|||
|
||||
// following functions need m_vt to be initialized
|
||||
|
||||
GSVertexTrace* m_vt;
|
||||
GSVertexTrace m_vt;
|
||||
|
||||
void GetTextureMinMax(GSVector4i& r, const GIFRegTEX0& TEX0, const GIFRegCLAMP& CLAMP, bool linear);
|
||||
void GetAlphaMinMax();
|
||||
|
@ -213,7 +191,7 @@ public:
|
|||
bool m_nativeres;
|
||||
|
||||
public:
|
||||
GSState(GSVertexTrace* vt, size_t vertex_stride);
|
||||
GSState();
|
||||
virtual ~GSState();
|
||||
|
||||
void ResetHandlers();
|
||||
|
|
|
@ -82,13 +82,6 @@ bool GSDevice11::CreateTextureFX()
|
|||
return true;
|
||||
}
|
||||
|
||||
void GSDevice11::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
|
||||
{
|
||||
IASetVertexBuffer(vertex, sizeof(GSVertexHW11), vertex_count);
|
||||
IASetIndexBuffer(index, index_count);
|
||||
IASetPrimitiveTopology((D3D11_PRIMITIVE_TOPOLOGY)prim);
|
||||
}
|
||||
|
||||
void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
{
|
||||
hash_map<uint32, GSVertexShader11 >::const_iterator i = m_vs.find(sel);
|
||||
|
@ -118,6 +111,7 @@ void GSDevice11::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
|||
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
|
|
|
@ -61,13 +61,6 @@ GSTexture* GSDevice9::CreateMskFix(uint32 size, uint32 msk, uint32 fix)
|
|||
return t;
|
||||
}
|
||||
|
||||
void GSDevice9::SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim)
|
||||
{
|
||||
IASetVertexBuffer(vertex, sizeof(GSVertexHW9), vertex_count);
|
||||
IASetIndexBuffer(index, index_count);
|
||||
IASetPrimitiveTopology((D3DPRIMITIVETYPE)prim);
|
||||
}
|
||||
|
||||
void GSDevice9::SetupVS(VSSelector sel, const VSConstantBuffer* cb)
|
||||
{
|
||||
hash_map<uint32, GSVertexShader9>::const_iterator i = m_vs.find(sel);
|
||||
|
|
|
@ -22,6 +22,46 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSVector.h"
|
||||
|
||||
const GSVector4i GSVector4i::m_xff[16] =
|
||||
{
|
||||
GSVector4i(0x000000ff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0000ffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00ffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x000000ff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x0000ffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0x00ffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00000000, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x000000ff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x0000ffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0x00ffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00000000),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x000000ff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x0000ffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0x00ffffff),
|
||||
GSVector4i(0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff),
|
||||
};
|
||||
|
||||
const GSVector4i GSVector4i::m_x0f[16] =
|
||||
{
|
||||
GSVector4i(0x0000000f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x00000f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x000f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000000, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0000000f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x00000f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x000f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000000, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000000),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0000000f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x00000f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x000f0f0f),
|
||||
GSVector4i(0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f, 0x0f0f0f0f),
|
||||
};
|
||||
|
||||
const GSVector4 GSVector4::m_ps0123(0.0f, 1.0f, 2.0f, 3.0f);
|
||||
const GSVector4 GSVector4::m_ps4567(4.0f, 5.0f, 6.0f, 7.0f);
|
||||
const GSVector4 GSVector4::m_half(0.5f);
|
||||
|
|
|
@ -79,6 +79,9 @@ class GSVector4;
|
|||
|
||||
__aligned(class, 16) GSVector4i
|
||||
{
|
||||
static const GSVector4i m_xff[16];
|
||||
static const GSVector4i m_x0f[16];
|
||||
|
||||
public:
|
||||
union
|
||||
{
|
||||
|
@ -2343,6 +2346,9 @@ public:
|
|||
__forceinline static GSVector4i xfff8(const GSVector4i& v) {return xffffffff(v).sll16( 3);}
|
||||
__forceinline static GSVector4i xfffc(const GSVector4i& v) {return xffffffff(v).sll16( 2);}
|
||||
__forceinline static GSVector4i xfffe(const GSVector4i& v) {return xffffffff(v).sll16( 1);}
|
||||
|
||||
__forceinline static GSVector4i xff(int n) {return m_xff[n];}
|
||||
__forceinline static GSVector4i x0f(int n) {return m_x0f[n];}
|
||||
};
|
||||
|
||||
__aligned(class, 16) GSVector4
|
||||
|
|
|
@ -37,7 +37,8 @@ __aligned(struct, 32) GSVertex
|
|||
GIFRegST ST;
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
GIFRegXYZ XYZ;
|
||||
uint32 UV, FOG;
|
||||
union {uint32 UV; struct {uint16 U, V;};};
|
||||
uint32 FOG;
|
||||
};
|
||||
|
||||
__m128i m[2];
|
||||
|
|
|
@ -37,45 +37,4 @@ __aligned(struct, 32) GSVertexHW9
|
|||
GSVertexHW9& operator = (GSVertexHW9& v) {t = v.t; p = v.p; return *this;}
|
||||
};
|
||||
|
||||
__aligned(union, 32) GSVertexHW11
|
||||
{
|
||||
struct
|
||||
{
|
||||
union
|
||||
{
|
||||
struct {float x, y;} t;
|
||||
GIFRegST ST;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
union {struct {uint8 r, g, b, a; float q;}; uint32 c0;};
|
||||
GIFRegRGBAQ RGBAQ;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
struct {union {struct {uint16 x, y;}; uint32 xy;}; uint32 z;} p;
|
||||
GIFRegXYZ XYZ;
|
||||
};
|
||||
|
||||
union
|
||||
{
|
||||
struct {uint32 _pad; union {struct {uint8 ta0, ta1, res, f;}; uint32 c1;};};
|
||||
GIFRegFOG FOG;
|
||||
};
|
||||
};
|
||||
|
||||
GSVertexHW11& operator = (GSVertexHW11& v)
|
||||
{
|
||||
GSVector4i* RESTRICT src = (GSVector4i*)&v;
|
||||
GSVector4i* RESTRICT dst = (GSVector4i*)this;
|
||||
|
||||
dst[0] = src[0];
|
||||
dst[1] = src[1];
|
||||
|
||||
return *this;
|
||||
}
|
||||
};
|
||||
|
||||
#pragma pack(pop)
|
||||
|
|
|
@ -29,10 +29,38 @@ const GSVector4 GSVertexTrace::s_minmax(FLT_MAX, -FLT_MAX);
|
|||
GSVertexTrace::GSVertexTrace(const GSState* state)
|
||||
: m_state(state)
|
||||
{
|
||||
#define InitUpdate3(P, IIP, TME, FST, COLOR) \
|
||||
m_fmm[COLOR][FST][TME][IIP][P] = &GSVertexTrace::FindMinMax<P, IIP, TME, FST, COLOR>;
|
||||
|
||||
#define InitUpdate2(P, IIP, TME) \
|
||||
InitUpdate3(P, IIP, TME, 0, 0) \
|
||||
InitUpdate3(P, IIP, TME, 0, 1) \
|
||||
InitUpdate3(P, IIP, TME, 1, 0) \
|
||||
InitUpdate3(P, IIP, TME, 1, 1) \
|
||||
|
||||
#define InitUpdate(P) \
|
||||
InitUpdate2(P, 0, 0) \
|
||||
InitUpdate2(P, 0, 1) \
|
||||
InitUpdate2(P, 1, 0) \
|
||||
InitUpdate2(P, 1, 1) \
|
||||
|
||||
InitUpdate(GS_POINT_CLASS);
|
||||
InitUpdate(GS_LINE_CLASS);
|
||||
InitUpdate(GS_TRIANGLE_CLASS);
|
||||
InitUpdate(GS_SPRITE_CLASS);
|
||||
}
|
||||
|
||||
void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_primclass = primclass;
|
||||
|
||||
uint32 iip = m_state->PRIM->IIP;
|
||||
uint32 tme = m_state->PRIM->TME;
|
||||
uint32 fst = m_state->PRIM->FST;
|
||||
uint32 color = !(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC);
|
||||
|
||||
(this->*m_fmm[color][fst][tme][iip][primclass])(vertex, index, count);
|
||||
|
||||
m_eq.value = (m_min.c == m_max.c).mask() | ((m_min.p == m_max.p).mask() << 16) | ((m_min.t == m_max.t).mask() << 20);
|
||||
|
||||
m_alpha.valid = false;
|
||||
|
@ -82,90 +110,108 @@ void GSVertexTrace::Update(const void* vertex, const uint32* index, int count, G
|
|||
}
|
||||
}
|
||||
|
||||
uint32 GSVertexTrace::Hash(GS_PRIM_CLASS primclass)
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int count)
|
||||
{
|
||||
m_primclass = primclass;
|
||||
|
||||
uint32 hash = m_primclass | (m_state->PRIM->IIP << 2) | (m_state->PRIM->TME << 3) | (m_state->PRIM->FST << 4);
|
||||
|
||||
if(!(m_state->PRIM->TME && m_state->m_context->TEX0.TFX == TFX_DECAL && m_state->m_context->TEX0.TCC))
|
||||
{
|
||||
hash |= 1 << 5;
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
GSVertexTraceSW::GSVertexTraceSW(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceSW", NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void GSVertexTraceSW::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
|
||||
GSVertexTraceDX9::GSVertexTraceDX9(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceHW9", NULL)
|
||||
{
|
||||
}
|
||||
|
||||
void GSVertexTraceDX9::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
||||
GSVector4 o(context->XYOFFSET);
|
||||
GSVector4 s(1.0f / 16, 1.0f / 16, 1.0f, 1.0f);
|
||||
bool sprite = primclass == GS_SPRITE_CLASS;
|
||||
|
||||
m_min.p = (m_min.p - o) * s;
|
||||
m_max.p = (m_max.p - o) * s;
|
||||
int n = 1;
|
||||
|
||||
if(m_state->PRIM->TME)
|
||||
switch(primclass)
|
||||
{
|
||||
if(m_state->PRIM->FST)
|
||||
{
|
||||
s = GSVector4(1 << (16 - 4), 1).xxyy();
|
||||
}
|
||||
else
|
||||
{
|
||||
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
|
||||
}
|
||||
|
||||
m_min.t *= s;
|
||||
m_max.t *= s;
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
}
|
||||
GSVector4 pmin = s_minmax.xxxx();
|
||||
GSVector4 pmax = s_minmax.yyyy();
|
||||
GSVector4 tmin = s_minmax.xxxx();
|
||||
GSVector4 tmax = s_minmax.yyyy();
|
||||
GSVector4i cmin = GSVector4i::xffffffff();
|
||||
GSVector4i cmax = GSVector4i::zero();
|
||||
|
||||
GSVertexTraceDX11::GSVertexTraceDX11(const GSState* state)
|
||||
: GSVertexTrace(state)
|
||||
, m_map("VertexTraceHW11", NULL)
|
||||
{
|
||||
}
|
||||
const GSVertex* RESTRICT v = (GSVertex*)vertex;
|
||||
|
||||
void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass)
|
||||
{
|
||||
m_map[Hash(primclass)](count, vertex, index, m_min, m_max);
|
||||
for(int i = 0; i < count; i += n)
|
||||
{
|
||||
GSVector4 q;
|
||||
GSVector4i f;
|
||||
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
if(sprite)
|
||||
{
|
||||
if(tme && !fst)
|
||||
{
|
||||
q = GSVector4::load<true>(&v[index[i + 1]]).wwww();
|
||||
}
|
||||
|
||||
f = GSVector4i(v[index[i + 1]].m[1]).wwww();
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
GSVector4i c(v[index[i + j]].m[0]);
|
||||
|
||||
if(color && (iip || j == n - 1)) // TODO: unroll, to avoid j == n - 1
|
||||
{
|
||||
cmin = cmin.min_u8(c);
|
||||
cmax = cmax.max_u8(c);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq = GSVector4::cast(c);
|
||||
|
||||
GSVector4 q2 = !sprite ? stq.wwww() : q;
|
||||
|
||||
stq = (stq.xyww() * q2.rcpnr()).xyww(q2);
|
||||
|
||||
tmin = tmin.min(stq);
|
||||
tmax = tmax.max(stq);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv(v[index[i + j]].m[1]);
|
||||
|
||||
GSVector4 st = GSVector4(uv.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st);
|
||||
tmax = tmax.max(st);
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf(v[index[i + j]].m[1]);
|
||||
|
||||
GSVector4i xy = xyzf.upl16();
|
||||
GSVector4i z = xyzf.yyyy().srl32(1);
|
||||
|
||||
GSVector4 p = GSVector4(xy.upl64(z.upl32(!sprite ? xyzf.wwww() : f)));
|
||||
|
||||
pmin = pmin.min(p);
|
||||
pmax = pmax.max(p);
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4 o(context->XYOFFSET);
|
||||
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);
|
||||
|
||||
m_min.p = (m_min.p - o) * s;
|
||||
m_max.p = (m_max.p - o) * s;
|
||||
m_min.p = (pmin - o) * s;
|
||||
m_max.p = (pmax - o) * s;
|
||||
|
||||
if(m_state->PRIM->TME)
|
||||
if(tme)
|
||||
{
|
||||
if(m_state->PRIM->FST)
|
||||
if(fst)
|
||||
{
|
||||
s = GSVector4(1 << (16 - 4), 1).xxyy();
|
||||
}
|
||||
|
@ -174,10 +220,13 @@ void GSVertexTraceDX11::Update(const void* vertex, const uint32* index, int coun
|
|||
s = GSVector4(0x10000 << context->TEX0.TW, 0x10000 << context->TEX0.TH, 1, 1);
|
||||
}
|
||||
|
||||
m_min.t *= s;
|
||||
m_max.t *= s;
|
||||
m_min.t = tmin * s;
|
||||
m_max.t = tmax * s;
|
||||
}
|
||||
|
||||
GSVertexTrace::Update(vertex, index, count, primclass);
|
||||
if(color)
|
||||
{
|
||||
m_min.c = cmin.zzzz().u8to32();
|
||||
m_max.c = cmax.zzzz().u8to32();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -38,12 +38,15 @@ public:
|
|||
protected:
|
||||
const GSState* m_state;
|
||||
|
||||
uint32 Hash(GS_PRIM_CLASS primclass);
|
||||
|
||||
typedef void (*VertexTracePtr)(int count, const void* vertex, const uint32* index, Vertex& min, Vertex& max);
|
||||
|
||||
static const GSVector4 s_minmax;
|
||||
|
||||
typedef void (GSVertexTrace::*FindMinMaxPtr)(const void* vertex, const uint32* index, int count);
|
||||
|
||||
FindMinMaxPtr m_fmm[2][2][2][2][4];
|
||||
|
||||
template<GS_PRIM_CLASS primclass, uint32 iip, uint32 tme, uint32 fst, uint32 color>
|
||||
void FindMinMax(const void* vertex, const uint32* index, int count);
|
||||
|
||||
public:
|
||||
GS_PRIM_CLASS m_primclass;
|
||||
|
||||
|
@ -69,55 +72,7 @@ public:
|
|||
GSVertexTrace(const GSState* state);
|
||||
virtual ~GSVertexTrace() {}
|
||||
|
||||
virtual void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
|
||||
bool IsLinear() const {return m_filter.linear;}
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceSW : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceSW(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceDX9 : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceDX9(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
||||
__aligned(class, 32) GSVertexTraceDX11 : public GSVertexTrace
|
||||
{
|
||||
class CG : public GSCodeGenerator
|
||||
{
|
||||
public:
|
||||
CG(const void* param, uint32 key, void* code, size_t maxsize);
|
||||
};
|
||||
|
||||
GSCodeGeneratorFunctionMap<CG, uint32, VertexTracePtr> m_map;
|
||||
|
||||
public:
|
||||
GSVertexTraceDX11(const GSState* state);
|
||||
|
||||
void Update(const void* vertex, const uint32* index, int count, GS_PRIM_CLASS primclass);
|
||||
};
|
||||
|
|
|
@ -1,496 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE >= 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
vbroadcastss(xmm4, ptr[rax + 0]);
|
||||
vbroadcastss(xmm5, ptr[rax + 4]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm2, xmm4);
|
||||
vmovaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
vmovaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
|
||||
|
||||
vminps(xmm2, xmm0);
|
||||
vmaxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexSW));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpsrld(xmm2, 7);
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
vcvttps2dq(xmm3, xmm3);
|
||||
vpsrld(xmm3, 7);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
vbroadcastss(xmm4, ptr[rax + 0]);
|
||||
vbroadcastss(xmm5, ptr[rax + 4]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
vmovaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW9));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
vmovdqa(ptr[rsp + 0], xmm6);
|
||||
vmovdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
vbroadcastss(xmm4, ptr[rax + 0]);
|
||||
vbroadcastss(xmm5, ptr[rax + 4]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
vmovaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]);
|
||||
vpmovzxwd(xmm1, xmm0);
|
||||
|
||||
vpsrld(xmm0, 1);
|
||||
vpunpcklqdq(xmm1, xmm0);
|
||||
vcvtdq2ps(xmm1, xmm1);
|
||||
|
||||
vminps(xmm4, xmm1);
|
||||
vmaxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW11));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
vmovaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
vmovdqa(xmm6, ptr[rsp + 0]);
|
||||
vmovdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,543 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE < 0x500 && (defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
GSVertexTrace::CGSW::CGSW(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
movss(xmm4, ptr[rax + 0]);
|
||||
movss(xmm5, ptr[rax + 4]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
movaps(xmm2, xmm4);
|
||||
movaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, ptr[rdx + 1 * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, c)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexSW) + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexSW));
|
||||
sub(rcx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
psrld(xmm2, 7);
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
psrld(xmm3, 7);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW9::CGHW9(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
movss(xmm4, ptr[rax + 0]);
|
||||
movss(xmm5, ptr[rax + 16]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, ptr[rdx + 5 * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW9) + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW9));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTrace::CGHW11::CGHW11(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
sub(rsp, 8 + 2 * 16);
|
||||
|
||||
movdqa(ptr[rsp + 0], xmm6);
|
||||
movdqa(ptr[rsp + 16], xmm7);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
mov(rax, (size_t)&s_minmax);
|
||||
|
||||
movss(xmm4, ptr[rax + 0]);
|
||||
movss(xmm5, ptr[rax + 16]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[rdx + j * sizeof(GSVertexHW11)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmm0, ptr[rdx + j * sizeof(GSVertexHW11) + 16]);
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pmovzxwd(xmm1, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
movdqa(xmm1, xmm0);
|
||||
punpcklwd(xmm1, xmm1);
|
||||
psrld(xmm1, 16);
|
||||
}
|
||||
|
||||
psrld(xmm0, 1);
|
||||
punpcklqdq(xmm1, xmm0);
|
||||
cvtdq2ps(xmm1, xmm1);
|
||||
|
||||
minps(xmm4, xmm1);
|
||||
maxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(rdx, n * sizeof(GSVertexHW11));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
movaps(ptr[r8 + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[r9 + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
movdqa(xmm6, ptr[rsp + 0]);
|
||||
movdqa(xmm7, ptr[rsp + 16]);
|
||||
|
||||
add(rsp, 8 + 2 * 16);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,513 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE >= 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 4;
|
||||
static const int _count = _args + 4; // rcx
|
||||
static const int _vertex = _args + 8; // rdx
|
||||
static const int _index = _args + 12; // r8
|
||||
static const int _min = _args + 16; // r9
|
||||
static const int _max = _args + 20; // _args + 4
|
||||
|
||||
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&s_minmax.x]);
|
||||
vbroadcastss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm2, xmm4);
|
||||
vmovaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
|
||||
|
||||
vminps(xmm2, xmm0);
|
||||
vmaxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
vcvttps2dq(xmm2, xmm2);
|
||||
vpsrld(xmm2, 7);
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
vcvttps2dq(xmm3, xmm3);
|
||||
vpsrld(xmm3, 7);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&s_minmax.x]);
|
||||
vbroadcastss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
vmovaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
vminps(xmm4, xmm0);
|
||||
vmaxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
vshufps(xmm1, xmm0, xmm0, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
vdivps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
vshufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
vbroadcastss(xmm4, ptr[&s_minmax.x]);
|
||||
vbroadcastss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
vpcmpeqd(xmm2, xmm2);
|
||||
vpxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
vmovaps(xmm6, xmm4);
|
||||
vmovaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW11)
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
vmovaps(xmm0, ptr[edx + eax]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
vpminub(xmm2, xmm0);
|
||||
vpmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
vmovaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
vshufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
vshufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
vdivps(xmm0, xmm1);
|
||||
vshufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
vminps(xmm6, xmm0);
|
||||
vmaxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
vmovdqa(xmm0, ptr[edx + eax + 16]);
|
||||
vpmovzxwd(xmm1, xmm0);
|
||||
|
||||
vpsrld(xmm0, 1);
|
||||
vpunpcklqdq(xmm1, xmm0);
|
||||
vcvtdq2ps(xmm1, xmm1);
|
||||
|
||||
vminps(xmm4, xmm1);
|
||||
vmaxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
vpshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm2, xmm2);
|
||||
|
||||
vpshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
vpmovzxbd(xmm3, xmm3);
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
vshufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
vshufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
vmovaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
vmovaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,562 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSVertexTrace.h"
|
||||
|
||||
#if _M_SSE < 0x500 && !(defined(_M_AMD64) || defined(_WIN64))
|
||||
|
||||
using namespace Xbyak;
|
||||
|
||||
static const int _args = 4;
|
||||
static const int _count = _args + 4; // rcx
|
||||
static const int _vertex = _args + 8; // rdx
|
||||
static const int _index = _args + 12; // r8
|
||||
static const int _min = _args + 16; // r9
|
||||
static const int _max = _args + 20; // _args + 4
|
||||
|
||||
GSVertexTraceSW::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&s_minmax.x]);
|
||||
movss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = FLT_MAX;
|
||||
// max.c = -FLT_MAX;
|
||||
|
||||
movaps(xmm2, xmm4);
|
||||
movaps(xmm3, xmm5);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 6); // * sizeof(GSVertexSW)
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.minv(v[i + j].c);
|
||||
// max.c = max.c.maxv(v[i + j].c);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, c)]);
|
||||
|
||||
minps(xmm2, xmm0);
|
||||
maxps(xmm3, xmm0);
|
||||
}
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexSW, t)]);
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
if(primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
}
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 2, 1, 0));
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
cvttps2dq(xmm2, xmm2);
|
||||
psrld(xmm2, 7);
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
|
||||
cvttps2dq(xmm3, xmm3);
|
||||
psrld(xmm3, 7);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX9::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 6;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&s_minmax.x]);
|
||||
movss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
if(tme && !fst && primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
mov(eax, ptr[ebx + 1 * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
movaps(xmm1, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW9)
|
||||
|
||||
// min.p = min.p.minv(v[i + j].p);
|
||||
// max.p = max.p.maxv(v[i + j].p);
|
||||
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, p)]);
|
||||
|
||||
minps(xmm4, xmm0);
|
||||
maxps(xmm5, xmm0);
|
||||
|
||||
if(tme && !fst && primclass != GS_SPRITE_CLASS)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[edx + eax + offsetof(GSVertexHW9, t)]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
// min.c = min.c.min_u8(v[i + j].c);
|
||||
// max.c = max.c.min_u8(v[i + j].c);
|
||||
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(1, 0, 1, 0)); // avoid FP assist, high part is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
// t /= p.wwww();
|
||||
|
||||
divps(xmm0, xmm1);
|
||||
}
|
||||
|
||||
// min.t = min.t.minv(v[i + j].t);
|
||||
// max.t = max.t.maxv(v[i + j].t);
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin;
|
||||
// m_max.p = pmax;
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin.xyww(pmin);
|
||||
// m_max.t = tmax.xyww(pmax);
|
||||
|
||||
shufps(xmm6, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm7, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
GSVertexTraceDX11::CG::CG(const void* param, uint32 key, void* code, size_t maxsize)
|
||||
: GSCodeGenerator(code, maxsize)
|
||||
{
|
||||
uint32 primclass = (key >> 0) & 3;
|
||||
uint32 iip = (key >> 2) & 1;
|
||||
uint32 tme = (key >> 3) & 1;
|
||||
uint32 fst = (key >> 4) & 1;
|
||||
uint32 color = (key >> 5) & 1;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
n = 1;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
n = 2;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
n = 3;
|
||||
break;
|
||||
}
|
||||
|
||||
push(ebx);
|
||||
|
||||
// min.p = FLT_MAX;
|
||||
// max.p = -FLT_MAX;
|
||||
|
||||
movss(xmm4, ptr[&s_minmax.x]);
|
||||
movss(xmm5, ptr[&s_minmax.y]);
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(0, 0, 0, 0));
|
||||
|
||||
if(color)
|
||||
{
|
||||
// min.c = 0xffffffff;
|
||||
// max.c = 0;
|
||||
|
||||
pcmpeqd(xmm2, xmm2);
|
||||
pxor(xmm3, xmm3);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// min.t = FLT_MAX;
|
||||
// max.t = -FLT_MAX;
|
||||
|
||||
movaps(xmm6, xmm4);
|
||||
movaps(xmm7, xmm5);
|
||||
}
|
||||
|
||||
// for(int i = 0; i < count; i += step) {
|
||||
|
||||
mov(edx, dword[esp + _vertex]);
|
||||
mov(ebx, dword[esp + _index]);
|
||||
mov(ecx, dword[esp + _count]);
|
||||
|
||||
align(16);
|
||||
|
||||
L("loop");
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
mov(eax, ptr[ebx + j * sizeof(uint32)]);
|
||||
shl(eax, 5); // * sizeof(GSVertexHW11)
|
||||
|
||||
if(color && (iip || j == n - 1) || tme)
|
||||
{
|
||||
movaps(xmm0, ptr[edx + eax]);
|
||||
}
|
||||
|
||||
if(color && (iip || j == n - 1))
|
||||
{
|
||||
pminub(xmm2, xmm0);
|
||||
pmaxub(xmm3, xmm0);
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
movaps(xmm1, xmm0);
|
||||
}
|
||||
|
||||
shufps(xmm0, xmm0, _MM_SHUFFLE(3, 3, 1, 0)); // avoid FP assist, third dword is integral
|
||||
|
||||
if(!fst)
|
||||
{
|
||||
shufps(xmm1, xmm1, _MM_SHUFFLE(3, 3, 3, 3));
|
||||
divps(xmm0, xmm1);
|
||||
shufps(xmm0, xmm1, _MM_SHUFFLE(3, 3, 1, 0)); // restore q
|
||||
}
|
||||
|
||||
minps(xmm6, xmm0);
|
||||
maxps(xmm7, xmm0);
|
||||
}
|
||||
|
||||
movdqa(xmm0, ptr[edx + eax + 16]);
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pmovzxwd(xmm1, xmm0);
|
||||
}
|
||||
else
|
||||
{
|
||||
movdqa(xmm1, xmm0);
|
||||
punpcklwd(xmm1, xmm1);
|
||||
psrld(xmm1, 16);
|
||||
}
|
||||
|
||||
psrld(xmm0, 1);
|
||||
punpcklqdq(xmm1, xmm0);
|
||||
cvtdq2ps(xmm1, xmm1);
|
||||
|
||||
minps(xmm4, xmm1);
|
||||
maxps(xmm5, xmm1);
|
||||
}
|
||||
|
||||
add(ebx, n * sizeof(uint32));
|
||||
sub(ecx, n);
|
||||
|
||||
jg("loop");
|
||||
|
||||
// }
|
||||
|
||||
mov(eax, dword[esp + _min]);
|
||||
mov(edx, dword[esp + _max]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
// m_min.c = cmin.zzzz().u8to32();
|
||||
// m_max.c = cmax.zzzz().u8to32();
|
||||
|
||||
if(m_cpu.has(util::Cpu::tSSE41))
|
||||
{
|
||||
pshufd(xmm2, xmm2, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm2, xmm2);
|
||||
|
||||
pshufd(xmm3, xmm3, _MM_SHUFFLE(2, 2, 2, 2));
|
||||
pmovzxbd(xmm3, xmm3);
|
||||
}
|
||||
else
|
||||
{
|
||||
pxor(xmm0, xmm0);
|
||||
|
||||
punpckhbw(xmm2, xmm0);
|
||||
punpcklwd(xmm2, xmm0);
|
||||
|
||||
punpckhbw(xmm3, xmm0);
|
||||
punpcklwd(xmm3, xmm0);
|
||||
}
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, c)], xmm2);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, c)], xmm3);
|
||||
}
|
||||
|
||||
// m_min.p = pmin.xyww();
|
||||
// m_max.p = pmax.xyww();
|
||||
|
||||
shufps(xmm4, xmm4, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
shufps(xmm5, xmm5, _MM_SHUFFLE(3, 3, 1, 0));
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, p)], xmm4);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, p)], xmm5);
|
||||
|
||||
if(tme)
|
||||
{
|
||||
// m_min.t = tmin;
|
||||
// m_max.t = tmax;
|
||||
|
||||
movaps(ptr[eax + offsetof(GSVertexTrace::Vertex, t)], xmm6);
|
||||
movaps(ptr[edx + offsetof(GSVertexTrace::Vertex, t)], xmm7);
|
||||
}
|
||||
|
||||
pop(ebx);
|
||||
|
||||
ret();
|
||||
}
|
||||
|
||||
#endif
|
|
@ -618,62 +618,6 @@
|
|||
<ClCompile Include="GSVertexList.cpp" />
|
||||
<ClCompile Include="GSVertexSW.cpp" />
|
||||
<ClCompile Include="GSVertexTrace.cpp" />
|
||||
<ClCompile Include="GSVertexTrace.x64.avx.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x64.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.avx.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.cpp">
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE2|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSE4|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release SSSE3|x64'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Debug AVX|Win32'">true</ExcludedFromBuild>
|
||||
<ExcludedFromBuild Condition="'$(Configuration)|$(Platform)'=='Release AVX|Win32'">true</ExcludedFromBuild>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSWnd.cpp" />
|
||||
<ClCompile Include="stdafx.cpp">
|
||||
<PrecompiledHeader Condition="'$(Configuration)|$(Platform)'=='Debug SSE2|Win32'">Create</PrecompiledHeader>
|
||||
|
|
|
@ -288,18 +288,6 @@
|
|||
<ClCompile Include="GSDeviceSDL.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x64.avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x64.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSVertexTrace.x86.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSSetupPrimCodeGenerator.x64.avx.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
|
|
@ -1244,110 +1244,6 @@
|
|||
RelativePath=".\GSVertexTrace.cpp"
|
||||
>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSVertexTrace.x64.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE2|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE2|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSSE3|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSSE3|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE4|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE4|Win32"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSVertexTrace.x86.cpp"
|
||||
>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE2|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE2|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSSE3|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSSE3|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Debug SSE4|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
<FileConfiguration
|
||||
Name="Release SSE4|x64"
|
||||
ExcludedFromBuild="true"
|
||||
>
|
||||
<Tool
|
||||
Name="VCCLCompilerTool"
|
||||
/>
|
||||
</FileConfiguration>
|
||||
</File>
|
||||
<File
|
||||
RelativePath=".\GSWnd.cpp"
|
||||
>
|
||||
|
|
|
@ -40,11 +40,12 @@
|
|||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float2 st : TEXCOORD0;
|
||||
float4 c : COLOR0;
|
||||
float q : TEXCOORD1;
|
||||
uint2 p : POSITION0;
|
||||
uint z : POSITION1;
|
||||
float2 t : TEXCOORD0;
|
||||
float q : TEXCOORD1;
|
||||
float4 c : COLOR0;
|
||||
uint2 uv : TEXCOORD2;
|
||||
float4 f : COLOR1;
|
||||
};
|
||||
|
||||
|
@ -602,12 +603,12 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||
{
|
||||
if(VS_FST)
|
||||
{
|
||||
output.t.xy = input.t * TextureScale;
|
||||
output.t.xy = input.uv * TextureScale;
|
||||
output.t.w = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
output.t.xy = input.t;
|
||||
output.t.xy = input.st;
|
||||
output.t.w = input.q;
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue