mirror of https://github.com/PCSX2/pcsx2.git
GSdx: more fun with shaders but nothing works yet.
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5083 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
f47e261ade
commit
915a57d9f3
|
@ -218,12 +218,11 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
|
|||
return -1;
|
||||
}
|
||||
|
||||
if(s_gs == NULL)
|
||||
{
|
||||
s_gs = new GSRendererCS();
|
||||
delete s_gs;
|
||||
|
||||
s_renderer = renderer;
|
||||
}
|
||||
s_gs = new GSRendererCS();
|
||||
|
||||
s_renderer = renderer;
|
||||
|
||||
#endif
|
||||
}
|
||||
|
|
|
@ -1096,9 +1096,9 @@ REG_SET_END
|
|||
__aligned(struct, 32) GIFPath
|
||||
{
|
||||
GIFTag tag;
|
||||
uint32 reg;
|
||||
uint32 nreg;
|
||||
uint32 nloop;
|
||||
uint32 nreg;
|
||||
uint32 reg;
|
||||
uint32 type;
|
||||
GSVector4i regs;
|
||||
|
||||
|
@ -1106,13 +1106,26 @@ __aligned(struct, 32) GIFPath
|
|||
|
||||
__forceinline void SetTag(const void* mem)
|
||||
{
|
||||
GSVector4i v = GSVector4i::load<false>(mem);
|
||||
GSVector4i::store<true>(&tag, v);
|
||||
const GIFTag* RESTRICT src = (const GIFTag*)mem;
|
||||
|
||||
// the compiler has a hard time not reloading every time a field of src is accessed
|
||||
|
||||
uint32 a = src->u32[0];
|
||||
uint32 b = src->u32[1];
|
||||
|
||||
tag.u32[0] = a;
|
||||
tag.u32[1] = b;
|
||||
|
||||
nloop = a & 0x7fff;
|
||||
|
||||
if(nloop == 0) return;
|
||||
|
||||
GSVector4i v = GSVector4i::loadl(&src->REGS); // REGS not stored to tag.REGS, only into this->regs, restored before saving the state though
|
||||
|
||||
nreg = (b & 0xf0000000) ? (b >> 28) : 16; // src->NREG
|
||||
regs = v.upl8(v >> 4) & GSVector4i::x0f(nreg);
|
||||
reg = 0;
|
||||
nreg = tag.NREG ? tag.NREG : 16;
|
||||
regs = v.uph8(v >> 4) & GSVector4i::x0f(nreg);
|
||||
nloop = tag.NLOOP;
|
||||
|
||||
type = TYPE_UNKNOWN;
|
||||
|
||||
if(tag.FLG == GIF_FLG_PACKED)
|
||||
|
|
|
@ -389,6 +389,8 @@ void GSClut::GetAlphaMinMax32(int& amin, int& amax)
|
|||
|
||||
void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 4 blocks
|
||||
|
||||
for(int i = 0; i < 64; i += 16)
|
||||
{
|
||||
WriteCLUT_T32_I4_CSM1(&src[i + 0], &clut[i * 2 + 0]);
|
||||
|
@ -400,6 +402,8 @@ void GSClut::WriteCLUT_T32_I8_CSM1(const uint32* RESTRICT src, uint16* RESTRICT
|
|||
|
||||
__forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 1 block
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)clut;
|
||||
|
||||
|
@ -420,6 +424,8 @@ __forceinline void GSClut::WriteCLUT_T32_I4_CSM1(const uint32* RESTRICT src, uin
|
|||
|
||||
void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 2 blocks
|
||||
|
||||
GSVector4i* s = (GSVector4i*)src;
|
||||
GSVector4i* d = (GSVector4i*)clut;
|
||||
|
||||
|
@ -443,6 +449,8 @@ void GSClut::WriteCLUT_T16_I8_CSM1(const uint16* RESTRICT src, uint16* RESTRICT
|
|||
|
||||
__forceinline void GSClut::WriteCLUT_T16_I4_CSM1(const uint16* RESTRICT src, uint16* RESTRICT clut)
|
||||
{
|
||||
// 1 block (half)
|
||||
|
||||
for(int i = 0; i < 16; i++)
|
||||
{
|
||||
clut[i] = src[clutTableT16I4[i]];
|
||||
|
|
|
@ -103,6 +103,7 @@ public:
|
|||
virtual void BeginScene() {}
|
||||
virtual void DrawPrimitive() {};
|
||||
virtual void DrawIndexedPrimitive() {}
|
||||
virtual void DrawIndexedPrimitive(int offset, int count) {}
|
||||
virtual void EndScene();
|
||||
|
||||
virtual void ClearRenderTarget(GSTexture* t, const GSVector4& c) {}
|
||||
|
|
|
@ -98,8 +98,6 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, flags, levels, countof(levels), D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx);
|
||||
// hr = D3D11CreateDeviceAndSwapChain(NULL, D3D_DRIVER_TYPE_REFERENCE, NULL, flags, NULL, 0, D3D11_SDK_VERSION, &scd, &m_swapchain, &m_dev, &level, &m_ctx);
|
||||
|
||||
//return false;
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
if(!SetFeatureLevel(level, true))
|
||||
|
@ -360,6 +358,13 @@ void GSDevice11::DrawIndexedPrimitive()
|
|||
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::DrawIndexedPrimitive(int offset, int count)
|
||||
{
|
||||
ASSERT(offset + count <= m_index.count);
|
||||
|
||||
m_ctx->DrawIndexed(count, m_index.start + offset, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::Dispatch(uint32 x, uint32 y, uint32 z)
|
||||
{
|
||||
m_ctx->Dispatch(x, y, z);
|
||||
|
@ -995,9 +1000,9 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
|
||||
void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
|
||||
{
|
||||
// TODO: if(m_state.cs_srv[i] != srv)
|
||||
if(m_state.cs_srv[i] != srv)
|
||||
{
|
||||
// TODO: m_state.cs_srv[i] = srv;
|
||||
m_state.cs_srv[i] = srv;
|
||||
|
||||
m_ctx->CSSetShaderResources(i, 1, &srv);
|
||||
}
|
||||
|
@ -1005,17 +1010,14 @@ void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
|
|||
|
||||
void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav)
|
||||
{
|
||||
// TODO: if(m_state.cs_uav[i] != uav)
|
||||
{
|
||||
// TODO: m_state.cs_uav[i] = uav;
|
||||
uint32 counters[8];
|
||||
|
||||
memset(counters, 0, sizeof(counters));
|
||||
|
||||
// uint32 count[] = {-1};
|
||||
|
||||
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, NULL);
|
||||
}
|
||||
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, counters);
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
|
||||
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb)
|
||||
{
|
||||
if(m_state.cs != cs)
|
||||
{
|
||||
|
@ -1023,6 +1025,13 @@ void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
|
|||
|
||||
m_ctx->CSSetShader(cs, NULL, 0);
|
||||
}
|
||||
|
||||
if(m_state.cs_cb != cs_cb)
|
||||
{
|
||||
m_state.cs_cb = cs_cb;
|
||||
|
||||
m_ctx->CSSetConstantBuffers(0, 1, &cs_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
|
||||
|
@ -1065,8 +1074,6 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
|
|||
m_ctx->OMSetRenderTargets(1, &rtv, dsv);
|
||||
}
|
||||
|
||||
memset(m_state.uav, 0, sizeof(m_state.uav));
|
||||
|
||||
if(m_state.viewport != rt->GetSize())
|
||||
{
|
||||
m_state.viewport = rt->GetSize();
|
||||
|
@ -1095,20 +1102,9 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::OMSetRenderTargets(const GSVector2i& rtsize, ID3D11UnorderedAccessView** uav, int count, const GSVector4i* scissor)
|
||||
void GSDevice11::OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor)
|
||||
{
|
||||
for(int i = 0; i < count; i++)
|
||||
{
|
||||
if(m_state.uav[i] != uav[i])
|
||||
{
|
||||
memcpy(m_state.uav, uav, sizeof(uav[0]) * count);
|
||||
memset(m_state.uav + count, 0, sizeof(m_state.uav) - sizeof(uav[0]) * count);
|
||||
|
||||
m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, NULL, NULL, 0, count, uav, NULL);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
m_ctx->OMSetRenderTargetsAndUnorderedAccessViews(0, NULL, NULL, 0, count, uav, counters);
|
||||
|
||||
m_state.rtv = NULL;
|
||||
m_state.dsv = NULL;
|
||||
|
@ -1286,7 +1282,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MAC
|
|||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.cs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
|
|
|
@ -64,7 +64,9 @@ class GSDevice11 : public GSDeviceDX
|
|||
ID3D11PixelShader* ps;
|
||||
ID3D11Buffer* ps_cb;
|
||||
ID3D11SamplerState* ps_ss[3];
|
||||
ID3D11ShaderResourceView* cs_srv[16];
|
||||
ID3D11ComputeShader* cs;
|
||||
ID3D11Buffer* cs_cb;
|
||||
GSVector2i viewport;
|
||||
GSVector4i scissor;
|
||||
ID3D11DepthStencilState* dss;
|
||||
|
@ -73,7 +75,6 @@ class GSDevice11 : public GSDeviceDX
|
|||
float bf;
|
||||
ID3D11RenderTargetView* rtv;
|
||||
ID3D11DepthStencilView* dsv;
|
||||
ID3D11UnorderedAccessView* uav[8];
|
||||
} m_state;
|
||||
|
||||
public: // TODO
|
||||
|
@ -147,6 +148,7 @@ public:
|
|||
|
||||
void DrawPrimitive();
|
||||
void DrawIndexedPrimitive();
|
||||
void DrawIndexedPrimitive(int offset, int count);
|
||||
void Dispatch(uint32 x, uint32 y, uint32 z);
|
||||
|
||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
|
||||
|
@ -186,11 +188,11 @@ public:
|
|||
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL);
|
||||
void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv);
|
||||
void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav);
|
||||
void CSSetShader(ID3D11ComputeShader* cs);
|
||||
void CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb);
|
||||
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
|
||||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||
void OMSetRenderTargets(const GSVector2i& rtsize, ID3D11UnorderedAccessView** uav, int count, const GSVector4i* scissor = NULL);
|
||||
void OMSetRenderTargets(const GSVector2i& rtsize, int count, ID3D11UnorderedAccessView** uav, uint32* counters, const GSVector4i* scissor = NULL);
|
||||
|
||||
void SetupVS(VSSelector sel, const VSConstantBuffer* cb);
|
||||
void SetupGS(GSSelector sel);
|
||||
|
|
|
@ -22,16 +22,28 @@
|
|||
#include "stdafx.h"
|
||||
#include "GSRendererCS.h"
|
||||
|
||||
#define PS_BATCH_SIZE 512
|
||||
|
||||
GSRendererCS::GSRendererCS()
|
||||
: GSRenderer()
|
||||
{
|
||||
m_nativeres = true;
|
||||
|
||||
memset(m_vm_valid, 0, sizeof(m_vm_valid));
|
||||
|
||||
memset(m_texture, 0, sizeof(m_texture));
|
||||
|
||||
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
|
||||
}
|
||||
|
||||
GSRendererCS::~GSRendererCS()
|
||||
{
|
||||
for(int i = 0; i < countof(m_texture); i++)
|
||||
{
|
||||
delete m_texture[i];
|
||||
}
|
||||
|
||||
_aligned_free(m_output);
|
||||
}
|
||||
|
||||
bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
||||
|
@ -47,6 +59,7 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_TEXTURE2D_DESC td;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
|
||||
D3D_FEATURE_LEVEL level;
|
||||
|
||||
|
@ -59,8 +72,6 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
delete dev->CreateRenderTarget(1024, 1024, false);
|
||||
|
||||
// empty depth stencil state
|
||||
|
||||
memset(&dsd, 0, sizeof(dsd));
|
||||
|
@ -100,6 +111,92 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// link buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 256 << 20; // 256 MB w00t
|
||||
bd.StructureByteStride = sizeof(uint32) * 4; // c, z, id, next
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_lb);
|
||||
|
||||
{
|
||||
uint32 data[] = {0, 0, 0xffffffff, 0};
|
||||
|
||||
D3D11_BOX box;
|
||||
memset(&box, 0, sizeof(box));
|
||||
box.right = sizeof(data);
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
ctx->UpdateSubresource(m_lb, 0, &box, data, 0, 0);
|
||||
}
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uavd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride;
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_lb, &uavd, &m_lb_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_lb, &srvd, &m_lb_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// start offset buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(uint32) * 2048 * 2048; // index
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_sob);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavd.Buffer.NumElements = bd.ByteWidth / sizeof(uint32);
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_sob, &uavd, &m_sob_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
srvd.BufferEx.NumElements = bd.ByteWidth / sizeof(uint32);
|
||||
srvd.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_sob, &srvd, &m_sob_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
const uint32 tmp = 0;
|
||||
|
||||
ctx->ClearUnorderedAccessViewUint(m_sob_uav, &tmp); // initial clear, next time Draw should restore it in Step 2
|
||||
|
||||
// video memory (4MB)
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
@ -190,6 +287,17 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// PS
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
hr = dev->CompileShader(IDR_CS_FX, "ps_main0", macro, &m_ps0);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// PSConstantBuffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
@ -204,19 +312,19 @@ bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
|||
|
||||
//
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 14 * sizeof(float) * 200000;
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_STREAM_OUTPUT | D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_sob);
|
||||
|
||||
//
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSRendererCS::ResetDevice()
|
||||
{
|
||||
for(int i = 0; i < countof(m_texture); i++)
|
||||
{
|
||||
delete m_texture[i];
|
||||
|
||||
m_texture[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererCS::VSync(int field)
|
||||
{
|
||||
__super::VSync(field);
|
||||
|
@ -228,7 +336,40 @@ GSTexture* GSRendererCS::GetOutput(int i)
|
|||
{
|
||||
// TODO: create a compute shader which unswizzles the frame from m_vm to the output texture
|
||||
|
||||
return NULL;
|
||||
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
|
||||
|
||||
int w = DISPFB.FBW * 64;
|
||||
int h = GetFrameRect(i).bottom;
|
||||
|
||||
// TODO: round up bottom
|
||||
|
||||
if(m_dev->ResizeTexture(&m_texture[i], w, h))
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM];
|
||||
|
||||
GSVector4i r(0, 0, w, h);
|
||||
GSVector4i r2 = r.ralign<Align_Outside>(psm.bs);
|
||||
|
||||
GSOffset* o = m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM);
|
||||
|
||||
Read(o, r2, false);
|
||||
|
||||
(m_mem.*psm.rtx)(o, r2, m_output, 1024 * 4, m_env.TEXA);
|
||||
|
||||
m_texture[i]->Update(r, m_output, 1024 * 4);
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM));
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
return m_texture[i];
|
||||
}
|
||||
|
||||
void GSRendererCS::Draw()
|
||||
|
@ -256,7 +397,9 @@ void GSRendererCS::Draw()
|
|||
Write(context->offset.zb, r);
|
||||
|
||||
// TODO: m_tc->InvalidateVideoMem(context->offset.zb, r, false);
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: if(24-bit) fm/zm |= 0xff000000;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
|
@ -281,16 +424,20 @@ void GSRendererCS::Draw()
|
|||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
//
|
||||
|
||||
dev->BeginScene();
|
||||
|
||||
// SetupOM
|
||||
|
||||
ID3D11UnorderedAccessView* uavs[] = {m_vm_uav};
|
||||
|
||||
dev->OMSetDepthStencilState(m_dss, 0);
|
||||
dev->OMSetBlendState(m_bs, 0);
|
||||
dev->OMSetRenderTargets(rtsize, uavs, countof(uavs), &scissor);
|
||||
|
||||
ID3D11UnorderedAccessView* uavs[] = {m_vm_uav, m_lb_uav, m_sob_uav};
|
||||
uint32 counters[] = {1, 0, 0};
|
||||
|
||||
dev->OMSetRenderTargets(rtsize, countof(uavs), uavs, counters, &scissor);
|
||||
|
||||
// SetupIA
|
||||
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology;
|
||||
|
@ -311,9 +458,18 @@ void GSRendererCS::Draw()
|
|||
__assume(0);
|
||||
}
|
||||
|
||||
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next);
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail);
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
GSVector4i r2 = bbox.add32(GSVector4i(-1, -1, 1, 1)).rintersect(scissor);
|
||||
|
||||
m_vertex.buff[m_vertex.next + 0].XYZ.X = context->XYOFFSET.OFX + (r2.left << 4);
|
||||
m_vertex.buff[m_vertex.next + 0].XYZ.Y = context->XYOFFSET.OFY + (r2.top << 4);
|
||||
m_vertex.buff[m_vertex.next + 1].XYZ.X = context->XYOFFSET.OFX + (r2.right << 4);
|
||||
m_vertex.buff[m_vertex.next + 1].XYZ.Y = context->XYOFFSET.OFY + (r2.bottom << 4);
|
||||
|
||||
m_index.buff[m_index.tail + 0] = m_vertex.next + 0;
|
||||
m_index.buff[m_index.tail + 1] = m_vertex.next + 1;
|
||||
|
||||
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next + 2);
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail + 2);
|
||||
|
||||
// SetupVS
|
||||
|
||||
|
@ -337,9 +493,15 @@ void GSRendererCS::Draw()
|
|||
//vs_cb.VertexOffset = GSVector4(ox * sx, oy * sy, 0.0f, -1.0f);
|
||||
|
||||
{
|
||||
hash_map<uint32, GSVertexShader11 >::const_iterator i = m_vs.find(vs_sel);
|
||||
GSVertexShader11 vs;
|
||||
|
||||
if(i == m_vs.end())
|
||||
hash_map<uint32, GSVertexShader11>::const_iterator i = m_vs.find(vs_sel);
|
||||
|
||||
if(i != m_vs.end())
|
||||
{
|
||||
vs = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
string str[2];
|
||||
|
||||
|
@ -364,20 +526,16 @@ void GSRendererCS::Draw()
|
|||
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
GSVertexShader11 vs;
|
||||
|
||||
dev->CompileShader(IDR_CS_FX, "vs_main", macro, &vs.vs, layout, countof(layout), &vs.il);
|
||||
|
||||
m_vs[vs_sel] = vs;
|
||||
|
||||
i = m_vs.find(vs_sel);
|
||||
}
|
||||
|
||||
ctx->UpdateSubresource(m_vs_cb, 0, NULL, &vs_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
dev->VSSetShader(i->second.vs, m_vs_cb);
|
||||
dev->VSSetShader(vs.vs, m_vs_cb);
|
||||
|
||||
dev->IASetInputLayout(i->second.il);
|
||||
dev->IASetInputLayout(vs.il);
|
||||
}
|
||||
|
||||
// SetupGS
|
||||
|
@ -385,23 +543,25 @@ void GSRendererCS::Draw()
|
|||
GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
gs_sel.prim = m_vt.m_primclass;
|
||||
|
||||
CComPtr<ID3D11GeometryShader> gs;
|
||||
CComPtr<ID3D11GeometryShader> gs[2];
|
||||
|
||||
for(int j = 0; j < 2; j++)
|
||||
{
|
||||
gs_sel.prim = j == 0 ? m_vt.m_primclass : GS_SPRITE_CLASS;
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11GeometryShader> >::const_iterator i = m_gs.find(gs_sel);
|
||||
|
||||
if(i != m_gs.end())
|
||||
{
|
||||
gs = i->second;
|
||||
gs[j] = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
string str[2];
|
||||
|
||||
str[0] = format("%d", gs_sel.iip);
|
||||
str[1] = format("%d", gs_sel.prim);
|
||||
str[1] = format("%d", j == 0 ? gs_sel.prim : GS_SPRITE_CLASS);
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
|
@ -409,140 +569,111 @@ void GSRendererCS::Draw()
|
|||
{"GS_PRIM", str[1].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
/*
|
||||
D3D11_SO_DECLARATION_ENTRY layout[] =
|
||||
{
|
||||
{0, "SV_Position", 0, 0, 4, 0},
|
||||
{0, "TEXCOORD", 0, 0, 2, 0},
|
||||
{0, "TEXCOORD", 1, 0, 4, 0},
|
||||
{0, "COLOR", 0, 0, 4, 0},
|
||||
};
|
||||
*/
|
||||
dev->CompileShader(IDR_CS_FX, "gs_main", macro, &gs);//, layout, countof(layout));
|
||||
|
||||
m_gs[gs_sel] = gs;
|
||||
dev->CompileShader(IDR_CS_FX, "gs_main", macro, &gs[j]);
|
||||
|
||||
m_gs[gs_sel] = gs[j];
|
||||
}
|
||||
}
|
||||
|
||||
dev->GSSetShader(gs);
|
||||
|
||||
// SetupPS
|
||||
|
||||
PSSelector ps_sel;
|
||||
PSConstantBuffer ps_cb;
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps.find(ps_sel);
|
||||
|
||||
if(i == m_ps.end())
|
||||
{
|
||||
string str[15];
|
||||
|
||||
str[0] = format("%d", 0);
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"PS_TODO", str[0].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
CComPtr<ID3D11PixelShader> ps;
|
||||
|
||||
dev->CompileShader(IDR_CS_FX, "ps_main", macro, &ps);
|
||||
|
||||
m_ps[ps_sel] = ps;
|
||||
|
||||
i = m_ps.find(ps_sel);
|
||||
}
|
||||
|
||||
ctx->UpdateSubresource(m_ps_cb, 0, NULL, &ps_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
dev->PSSetSamplerState(m_ss, NULL, NULL);
|
||||
|
||||
dev->PSSetShader(i->second, m_ps_cb);
|
||||
PSSelector ps_sel;
|
||||
|
||||
// Offset
|
||||
ps_sel.fpsm = context->FRAME.PSM;
|
||||
ps_sel.zpsm = context->ZBUF.PSM;
|
||||
|
||||
CComPtr<ID3D11PixelShader> ps[2] = {m_ps0, NULL};
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11PixelShader> >::const_iterator i = m_ps1.find(ps_sel);
|
||||
|
||||
if(i != m_ps1.end())
|
||||
{
|
||||
ps[1] = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
string str[15];
|
||||
|
||||
str[0] = format("%d", PS_BATCH_SIZE);
|
||||
str[1] = format("%d", context->FRAME.PSM);
|
||||
str[2] = format("%d", context->ZBUF.PSM);
|
||||
|
||||
D3D11_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"PS_BATCH_SIZE", str[0].c_str()},
|
||||
{"PS_FPSM", str[1].c_str()},
|
||||
{"PS_ZPSM", str[2].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
dev->CompileShader(IDR_CS_FX, "ps_main1", macro, &ps[1]);
|
||||
|
||||
m_ps1[ps_sel] = ps[1];
|
||||
}
|
||||
|
||||
PSConstantBuffer ps_cb;
|
||||
|
||||
ps_cb.fm = fm;
|
||||
ps_cb.zm = zm;
|
||||
|
||||
ctx->UpdateSubresource(m_ps_cb, 0, NULL, &ps_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
OffsetBuffer* fzbo = NULL;
|
||||
|
||||
GetOffsetBuffer(&fzbo);
|
||||
|
||||
dev->PSSetShaderResourceView(0, fzbo->row_view);
|
||||
dev->PSSetShaderResourceView(1, fzbo->col_view);
|
||||
dev->PSSetShaderResourceView(0, fzbo->row_srv);
|
||||
dev->PSSetShaderResourceView(1, fzbo->col_srv);
|
||||
// TODO: palette, texture
|
||||
|
||||
// TODO: 2 palette
|
||||
// TODO: 3, 4, ... texture levels
|
||||
int step = PS_BATCH_SIZE * GSUtil::GetVertexCount(PRIM->PRIM);
|
||||
|
||||
//ID3D11Buffer* tmp[] = {m_sob};
|
||||
for(int i = 0; i < m_index.tail; i += step)
|
||||
{
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
dev->GSSetShader(gs[0]);
|
||||
dev->PSSetShader(ps[0], m_ps_cb);
|
||||
dev->DrawIndexedPrimitive(i, std::min<int>(m_index.tail - i, step));
|
||||
|
||||
//ctx->SOSetTargets(countof(tmp), tmp, NULL);
|
||||
dev->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST);
|
||||
dev->GSSetShader(gs[1]);
|
||||
dev->PSSetShader(ps[1], m_ps_cb);
|
||||
dev->DrawIndexedPrimitive(m_index.tail, 2);
|
||||
|
||||
dev->DrawIndexedPrimitive();
|
||||
//printf("%d/%d, %d %d %d %d\n", i, m_index.tail, r2.x, r2.y, r2.z, r2.w);
|
||||
}
|
||||
|
||||
//ctx->SOSetTargets(0, NULL, NULL);
|
||||
dev->EndScene();
|
||||
|
||||
if(0)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 14 * sizeof(float) * 200000;
|
||||
bd.Usage = D3D11_USAGE_STAGING;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
|
||||
|
||||
CComPtr<ID3D11Buffer> sob;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &sob);
|
||||
|
||||
ctx->CopyResource(sob, m_sob);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
||||
if(SUCCEEDED(ctx->Map(sob, 0, D3D11_MAP_READ, 0, &map)))
|
||||
{
|
||||
float* f = (float*)map.pData;
|
||||
|
||||
for(int i = 0; i < 12; i++, f += 14)
|
||||
printf("%f %f %f %f\n%f %f\n%f %f %f %f\n%f %f %f %f\n",
|
||||
f[0], f[1], f[2], f[3],
|
||||
f[4], f[5],
|
||||
f[6], f[7], f[8], f[9],
|
||||
f[10], f[11], f[12], f[13]);
|
||||
|
||||
ctx->Unmap(sob, 0);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
if(1)
|
||||
{
|
||||
//Read(m_mem.GetOffset(0, 16, PSM_PSMCT32), GSVector4i(0, 0, 1024, 1024), false);
|
||||
|
||||
std::string s;
|
||||
/*
|
||||
s = format("c:\\temp1\\_%05d_f%lld_fb0_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0);
|
||||
m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
Read(m_mem.GetOffset(0, 16, PSM_PSMCT32), GSVector4i(0, 0, 1024, 1024), false);
|
||||
*/
|
||||
//
|
||||
if(fm != 0xffffffff) Read(context->offset.fb, r, false);
|
||||
//
|
||||
if(zm != 0xffffffff) Read(context->offset.zb, r, false);
|
||||
|
||||
std::string s;
|
||||
|
||||
s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
|
||||
|
||||
//
|
||||
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
|
||||
|
||||
s = format("c:\\temp1\\_%05d_f%lld_zt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
|
||||
|
||||
//
|
||||
m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512);
|
||||
|
||||
//m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
/*
|
||||
s = format("c:\\temp1\\_%05d_f%lld_fb1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0);
|
||||
m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
*/
|
||||
|
||||
s_n++;
|
||||
}
|
||||
|
||||
dev->EndScene();
|
||||
}
|
||||
|
||||
void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
|
@ -715,11 +846,11 @@ bool GSRendererCS::GetOffsetBuffer(OffsetBuffer** fzbo)
|
|||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = 2048;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(ob.row, &srvd, &ob.row_view);
|
||||
hr = (*dev)->CreateShaderResourceView(ob.row, &srvd, &ob.row_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(ob.col, &srvd, &ob.col_view);
|
||||
hr = (*dev)->CreateShaderResourceView(ob.col, &srvd, &ob.col_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
|
|
|
@ -74,25 +74,33 @@ class GSRendererCS : public GSRenderer
|
|||
{
|
||||
struct
|
||||
{
|
||||
uint32 TODO:1;
|
||||
uint32 fpsm:6;
|
||||
uint32 zpsm:6;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x1;}
|
||||
operator uint32() {return key & 0x3ff;}
|
||||
|
||||
PSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
__aligned(struct, 32) PSConstantBuffer
|
||||
{
|
||||
GSVector4 TODO;
|
||||
uint32 fm;
|
||||
uint32 zm;
|
||||
};
|
||||
|
||||
CComPtr<ID3D11DepthStencilState> m_dss;
|
||||
CComPtr<ID3D11BlendState> m_bs;
|
||||
CComPtr<ID3D11SamplerState> m_ss;
|
||||
CComPtr<ID3D11Buffer> m_lb;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_lb_uav;
|
||||
CComPtr<ID3D11ShaderResourceView> m_lb_srv;
|
||||
CComPtr<ID3D11Buffer> m_sob;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_sob_uav;
|
||||
CComPtr<ID3D11ShaderResourceView> m_sob_srv;
|
||||
CComPtr<ID3D11Buffer> m_vm;
|
||||
//CComPtr<ID3D11Texture2D> m_vm;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_vm_uav;
|
||||
|
@ -102,9 +110,9 @@ class GSRendererCS : public GSRenderer
|
|||
hash_map<uint32, GSVertexShader11 > m_vs;
|
||||
CComPtr<ID3D11Buffer> m_vs_cb;
|
||||
hash_map<uint32, CComPtr<ID3D11GeometryShader> > m_gs;
|
||||
hash_map<uint32, CComPtr<ID3D11PixelShader> > m_ps;
|
||||
CComPtr<ID3D11PixelShader> m_ps0;
|
||||
hash_map<uint32, CComPtr<ID3D11PixelShader> > m_ps1;
|
||||
CComPtr<ID3D11Buffer> m_ps_cb;
|
||||
CComPtr<ID3D11Buffer> m_sob;
|
||||
|
||||
void Write(GSOffset* o, const GSVector4i& r);
|
||||
void Read(GSOffset* o, const GSVector4i& r, bool invalidate);
|
||||
|
@ -112,7 +120,7 @@ class GSRendererCS : public GSRenderer
|
|||
struct OffsetBuffer
|
||||
{
|
||||
CComPtr<ID3D11Buffer> row, col;
|
||||
CComPtr<ID3D11ShaderResourceView> row_view, col_view;
|
||||
CComPtr<ID3D11ShaderResourceView> row_srv, col_srv;
|
||||
};
|
||||
|
||||
hash_map<uint32, OffsetBuffer> m_offset;
|
||||
|
@ -120,7 +128,11 @@ class GSRendererCS : public GSRenderer
|
|||
bool GetOffsetBuffer(OffsetBuffer** fzbo);
|
||||
|
||||
protected:
|
||||
GSTexture* m_texture[2];
|
||||
uint8* m_output;
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
void ResetDevice();
|
||||
void VSync(int field);
|
||||
GSTexture* GetOutput(int i);
|
||||
void Draw();
|
||||
|
|
|
@ -490,6 +490,8 @@ void GSRendererSW::Sync(int reason)
|
|||
|
||||
void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
if(LOG) {fprintf(s_fp, "w %05x %d %d, %d %d %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM, r.x, r.y, r.z, r.w); fflush(s_fp);}
|
||||
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
o->GetPages(r, m_tmp_pages);
|
||||
|
@ -498,8 +500,6 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
|
||||
if(!m_rl->IsSynced())
|
||||
{
|
||||
if(LOG) {fprintf(s_fp, "w %05x %d %d\n", BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM); fflush(s_fp);}
|
||||
|
||||
for(uint32* RESTRICT p = m_tmp_pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
if(m_fzb_pages[*p] | m_tex_pages[*p])
|
||||
|
@ -516,10 +516,10 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
|
||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
if(LOG) {fprintf(s_fp, "r %05x %d %d, %d %d %d %d\n", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM, r.x, r.y, r.z, r.w); fflush(s_fp);}
|
||||
|
||||
if(!m_rl->IsSynced())
|
||||
{
|
||||
if(LOG) {fprintf(s_fp, "r %05x %d %d\n", BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM); fflush(s_fp);}
|
||||
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||
|
||||
o->GetPages(r, m_tmp_pages);
|
||||
|
@ -814,6 +814,8 @@ bool GSRendererSW::CheckSourcePages(SharedData* sd)
|
|||
|
||||
if(m_fzb_pages[*p]) // currently being drawn to? => sync
|
||||
{
|
||||
if(LOG) fprintf(s_fp, "r=8 %05x\n", *p << 5);
|
||||
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
@ -1395,20 +1397,19 @@ void GSRendererSW::SharedData::UseSourcePages()
|
|||
}
|
||||
|
||||
// TODO
|
||||
/*
|
||||
if(s_dump)
|
||||
|
||||
if(m_parent->s_dump)
|
||||
{
|
||||
uint64 frame = m_perfmon.GetFrame();
|
||||
uint64 frame = m_parent->m_perfmon.GetFrame();
|
||||
|
||||
string s;
|
||||
|
||||
if(s_save && s_n >= s_saven)
|
||||
if(m_parent->s_save && m_parent->s_n >= m_parent->s_saven)
|
||||
{
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", s_n, frame, i, (int)m_context->TEX0.TBP0, (int)m_context->TEX0.PSM);
|
||||
s = format("c:\\temp1\\_%05d_f%lld_tex%d_%05x_%d.bmp", m_parent->s_n, frame, i, (int)m_parent->m_context->TEX0.TBP0, (int)m_parent->m_context->TEX0.PSM);
|
||||
|
||||
sd->m_tex[i].t->Save(s);
|
||||
m_tex[i].t->Save(s);
|
||||
}
|
||||
}
|
||||
*/
|
||||
}
|
||||
}
|
||||
|
|
|
@ -587,13 +587,13 @@ void GSState::GIFRegHandlerNull(const GIFReg* RESTRICT r)
|
|||
// ASSERT(0);
|
||||
}
|
||||
|
||||
__forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
||||
__forceinline void GSState::ApplyPRIM(uint32 prim)
|
||||
{
|
||||
// ASSERT(r->PRIM.PRIM < 7);
|
||||
|
||||
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim.PRIM)) // NOTE: assume strips/fans are converted to lists
|
||||
if(GSUtil::GetPrimClass(m_env.PRIM.PRIM) == GSUtil::GetPrimClass(prim & 7)) // NOTE: assume strips/fans are converted to lists
|
||||
{
|
||||
if((m_env.PRIM.u32[0] ^ prim.u32[0]) & 0x7f8) // all fields except PRIM
|
||||
if((m_env.PRIM.u32[0] ^ prim) & 0x7f8) // all fields except PRIM
|
||||
{
|
||||
Flush();
|
||||
}
|
||||
|
@ -603,8 +603,8 @@ __forceinline void GSState::ApplyPRIM(const GIFRegPRIM& prim)
|
|||
Flush();
|
||||
}
|
||||
|
||||
m_env.PRIM = (GSVector4i)prim;
|
||||
m_env.PRMODE._PRIM = prim.PRIM;
|
||||
m_env.PRIM.u32[0] = prim;
|
||||
m_env.PRMODE._PRIM = prim;
|
||||
|
||||
UpdateContext();
|
||||
|
||||
|
@ -624,7 +624,7 @@ void GSState::GIFRegHandlerPRIM(const GIFReg* RESTRICT r)
|
|||
{
|
||||
ALIGN_STACK(32);
|
||||
|
||||
ApplyPRIM(r->PRIM);
|
||||
ApplyPRIM(r->PRIM.u32[0]);
|
||||
}
|
||||
|
||||
void GSState::GIFRegHandlerRGBAQ(const GIFReg* RESTRICT r)
|
||||
|
@ -715,19 +715,49 @@ template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
|||
if(wt)
|
||||
{
|
||||
GIFRegBITBLTBUF BITBLTBUF;
|
||||
|
||||
BITBLTBUF.SBP = TEX0.CBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
r.left = 0;
|
||||
r.top = 0;
|
||||
r.right = GSLocalMemory::m_psm[TEX0.CPSM].pgs.x;
|
||||
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].pgs.y;
|
||||
if(TEX0.CSM == 0)
|
||||
{
|
||||
BITBLTBUF.SBP = TEX0.CBP;
|
||||
BITBLTBUF.SBW = 1;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
|
||||
r.left = 0;
|
||||
r.top = 0;
|
||||
r.right = GSLocalMemory::m_psm[TEX0.CPSM].bs.x;
|
||||
r.bottom = GSLocalMemory::m_psm[TEX0.CPSM].bs.y;
|
||||
|
||||
int blocks = 4;
|
||||
|
||||
if(GSLocalMemory::m_psm[TEX0.CPSM].bpp == 16)
|
||||
{
|
||||
blocks >>= 1;
|
||||
}
|
||||
|
||||
if(GSLocalMemory::m_psm[TEX0.PSM].bpp == 4)
|
||||
{
|
||||
blocks >>= 1;
|
||||
}
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, r, true);
|
||||
for(int i = 0; i < blocks; i++, BITBLTBUF.SBP++)
|
||||
{
|
||||
InvalidateLocalMem(BITBLTBUF, r, true);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
BITBLTBUF.SBP = TEX0.CBP;
|
||||
BITBLTBUF.SBW = m_env.TEXCLUT.CBW;
|
||||
BITBLTBUF.SPSM = TEX0.CSM;
|
||||
|
||||
r.left = m_env.TEXCLUT.COU;
|
||||
r.top = m_env.TEXCLUT.COV;
|
||||
r.right = r.left + GSLocalMemory::m_psm[TEX0.CPSM].pal;
|
||||
r.bottom = r.top + 1;
|
||||
|
||||
InvalidateLocalMem(BITBLTBUF, r, true);
|
||||
}
|
||||
|
||||
m_mem.m_clut.Write(m_env.CTXT[i].TEX0, m_env.TEXCLUT);
|
||||
}
|
||||
|
@ -1694,10 +1724,11 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
|
||||
if(0)
|
||||
{
|
||||
GIFTag* t = (GIFTag*)mem;
|
||||
uint64 hash;
|
||||
if(path.tag.NREG < 8) hash = path.tag.u32[2] & ((1 << path.tag.NREG * 4) - 1);
|
||||
else if(path.tag.NREG < 16) {hash = path.tag.u32[2]; ((uint32*)&hash)[1] = path.tag.u32[3] & ((1 << (path.tag.NREG - 8) * 4) - 1);}
|
||||
else hash = path.tag.u64[1];
|
||||
if(t->NREG < 8) hash = t->u32[2] & ((1 << t->NREG * 4) - 1);
|
||||
else if(t->NREG < 16) {hash = t->u32[2]; ((uint32*)&hash)[1] = t->u32[3] & ((1 << (t->NREG - 8) * 4) - 1);}
|
||||
else hash = t->u64[1];
|
||||
s_tags[hash] += path.nloop * path.nreg;
|
||||
}
|
||||
|
||||
|
@ -1712,9 +1743,7 @@ template<int index> void GSState::Transfer(const uint8* mem, uint32 size)
|
|||
|
||||
if(path.tag.PRE && path.tag.FLG == GIF_FLG_PACKED)
|
||||
{
|
||||
GIFRegPRIM r;
|
||||
r.u64 = path.tag.PRIM;
|
||||
ApplyPRIM(r);
|
||||
ApplyPRIM(path.tag.PRIM);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -2002,6 +2031,12 @@ int GSState::Freeze(GSFreezeData* fd, bool sizeonly)
|
|||
{
|
||||
m_path[i].tag.NREG = m_path[i].nreg;
|
||||
m_path[i].tag.NLOOP = m_path[i].nloop;
|
||||
m_path[i].tag.REGS = 0;
|
||||
|
||||
for(size_t j = 0; j < countof(m_path[i].regs.u8); j++)
|
||||
{
|
||||
m_path[i].tag.u32[2 + (j >> 3)] |= m_path[i].regs.u8[j] << ((j & 7) << 2);
|
||||
}
|
||||
|
||||
WriteState(data, &m_path[i].tag);
|
||||
WriteState(data, &m_path[i].reg);
|
||||
|
|
|
@ -70,7 +70,7 @@ class GSState : public GSAlignedClass<32>
|
|||
void GIFPackedRegHandlerNOP(const GIFPackedReg* RESTRICT r, uint32 size);
|
||||
|
||||
template<int i> void ApplyTEX0(GIFRegTEX0& TEX0);
|
||||
void ApplyPRIM(const GIFRegPRIM& PRIM);
|
||||
void ApplyPRIM(uint32 prim);
|
||||
|
||||
void GIFRegHandlerNull(const GIFReg* RESTRICT r);
|
||||
void GIFRegHandlerPRIM(const GIFReg* RESTRICT r);
|
||||
|
|
|
@ -115,8 +115,6 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
{
|
||||
const GSDrawingContext* context = m_state->m_context;
|
||||
|
||||
bool sprite = primclass == GS_SPRITE_CLASS;
|
||||
|
||||
int n = 1;
|
||||
|
||||
switch(primclass)
|
||||
|
@ -154,24 +152,11 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
|
||||
for(int i = 0; i < count; i += n)
|
||||
{
|
||||
GSVector4 q;
|
||||
GSVector4i f;
|
||||
|
||||
if(sprite)
|
||||
if(primclass == GS_POINT_CLASS)
|
||||
{
|
||||
if(tme && !fst)
|
||||
{
|
||||
q = GSVector4::load<true>(&v[index[i + 1]]).wwww();
|
||||
}
|
||||
GSVector4i c(v[index[i]].m[0]);
|
||||
|
||||
f = GSVector4i(v[index[i + 1]].m[1]).wwww();
|
||||
}
|
||||
|
||||
for(int j = 0; j < n; j++)
|
||||
{
|
||||
GSVector4i c(v[index[i + j]].m[0]);
|
||||
|
||||
if(color && (iip || j == n - 1)) // TODO: unroll, to avoid j == n - 1
|
||||
if(color)
|
||||
{
|
||||
cmin = cmin.min_u8(c);
|
||||
cmax = cmax.max_u8(c);
|
||||
|
@ -183,16 +168,16 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
{
|
||||
GSVector4 stq = GSVector4::cast(c);
|
||||
|
||||
GSVector4 q2 = !sprite ? stq.wwww() : q;
|
||||
GSVector4 q = stq.wwww();
|
||||
|
||||
stq = (stq.xyww() * q2.rcpnr()).xyww(q2);
|
||||
stq = (stq.xyww() * q.rcpnr()).xyww(q);
|
||||
|
||||
tmin = tmin.min(stq);
|
||||
tmax = tmax.max(stq);
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv(v[index[i + j]].m[1]);
|
||||
GSVector4i uv(v[index[i]].m[1]);
|
||||
|
||||
GSVector4 st = GSVector4(uv.uph16()).xyxy();
|
||||
|
||||
|
@ -201,29 +186,265 @@ void GSVertexTrace::FindMinMax(const void* vertex, const uint32* index, int coun
|
|||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf(v[index[i + j]].m[1]);
|
||||
GSVector4i xyzf(v[index[i]].m[1]);
|
||||
|
||||
GSVector4i xy = xyzf.upl16();
|
||||
GSVector4i z = xyzf.yyyy().srl32(1);
|
||||
GSVector4i z = xyzf.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p = xy.blend16<0xf0>(z.uph32(!sprite ? xyzf : f));
|
||||
GSVector4i p = xy.blend16<0xf0>(z.uph32(xyzf));
|
||||
|
||||
pmin = pmin.min_u32(p);
|
||||
pmax = pmax.max_u32(p);
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p = GSVector4(xy.upl64(z.upl32(!sprite ? xyzf.wwww() : f)));
|
||||
GSVector4 p = GSVector4(xy.upl64(z.srl32(1).upl32(xyzf.wwww())));
|
||||
|
||||
pmin = pmin.min(p);
|
||||
pmax = pmax.max(p);
|
||||
|
||||
#endif
|
||||
}
|
||||
else if(primclass == GS_LINE_CLASS)
|
||||
{
|
||||
GSVector4i c0(v[index[i + 0]].m[0]);
|
||||
GSVector4i c1(v[index[i + 1]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
if(iip)
|
||||
{
|
||||
cmin = cmin.min_u8(c0.min_u8(c1));
|
||||
cmax = cmax.max_u8(c0.max_u8(c1));
|
||||
}
|
||||
else
|
||||
{
|
||||
cmin = cmin.min_u8(c1);
|
||||
cmax = cmax.max_u8(c1);
|
||||
}
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
|
||||
GSVector4 q = stq0.wwww(stq1).rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.zzzz()).xyww(stq1);
|
||||
|
||||
tmin = tmin.min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq0.max(stq1));
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv0(v[index[i + 0]].m[1]);
|
||||
GSVector4i uv1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
|
||||
GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st0.min(st1));
|
||||
tmax = tmax.max(st0.max(st1));
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf0(v[index[i + 0]].m[1]);
|
||||
GSVector4i xyzf1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4i xy0 = xyzf0.upl16();
|
||||
GSVector4i z0 = xyzf0.yyyy();
|
||||
GSVector4i xy1 = xyzf1.upl16();
|
||||
GSVector4i z1 = xyzf1.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
|
||||
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
|
||||
|
||||
pmin = pmin.min_u32(p0.min_u32(p1));
|
||||
pmax = pmax.max_u32(p0.max_u32(p1));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
|
||||
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
|
||||
|
||||
pmin = pmin.min(p0.min(p1));
|
||||
pmax = pmax.max(p0.max(p1));
|
||||
|
||||
#endif
|
||||
}
|
||||
else if(primclass == GS_TRIANGLE_CLASS)
|
||||
{
|
||||
GSVector4i c0(v[index[i + 0]].m[0]);
|
||||
GSVector4i c1(v[index[i + 1]].m[0]);
|
||||
GSVector4i c2(v[index[i + 2]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
if(iip)
|
||||
{
|
||||
cmin = cmin.min_u8(c2).min_u8(c0.min_u8(c1));
|
||||
cmax = cmax.max_u8(c2).max_u8(c0.max_u8(c1));
|
||||
}
|
||||
else
|
||||
{
|
||||
cmin = cmin.min_u8(c2);
|
||||
cmax = cmax.max_u8(c2);
|
||||
}
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
GSVector4 stq2 = GSVector4::cast(c2);
|
||||
|
||||
GSVector4 q = stq0.wwww(stq1).xzww(stq2).rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q.xxxx()).xyww(stq0);
|
||||
stq1 = (stq1.xyww() * q.yyyy()).xyww(stq1);
|
||||
stq2 = (stq2.xyww() * q.zzzz()).xyww(stq2);
|
||||
|
||||
tmin = tmin.min(stq2).min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq2).max(stq0.max(stq1));
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv0(v[index[i + 0]].m[1]);
|
||||
GSVector4i uv1(v[index[i + 1]].m[1]);
|
||||
GSVector4i uv2(v[index[i + 2]].m[1]);
|
||||
|
||||
GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
|
||||
GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
|
||||
GSVector4 st2 = GSVector4(uv2.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st2).min(st0.min(st1));
|
||||
tmax = tmax.max(st2).max(st0.max(st1));
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf0(v[index[i + 0]].m[1]);
|
||||
GSVector4i xyzf1(v[index[i + 1]].m[1]);
|
||||
GSVector4i xyzf2(v[index[i + 2]].m[1]);
|
||||
|
||||
GSVector4i xy0 = xyzf0.upl16();
|
||||
GSVector4i z0 = xyzf0.yyyy();
|
||||
GSVector4i xy1 = xyzf1.upl16();
|
||||
GSVector4i z1 = xyzf1.yyyy();
|
||||
GSVector4i xy2 = xyzf2.upl16();
|
||||
GSVector4i z2 = xyzf2.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf0));
|
||||
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
|
||||
GSVector4i p2 = xy2.blend16<0xf0>(z2.uph32(xyzf2));
|
||||
|
||||
pmin = pmin.min_u32(p2).min_u32(p0.min_u32(p1));
|
||||
pmax = pmax.max_u32(p2).max_u32(p0.max_u32(p1));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf0.wwww())));
|
||||
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
|
||||
GSVector4 p2 = GSVector4(xy2.upl64(z2.srl32(1).upl32(xyzf2.wwww())));
|
||||
|
||||
pmin = pmin.min(p2).min(p0.min(p1));
|
||||
pmax = pmax.max(p2).max(p0.max(p1));
|
||||
|
||||
#endif
|
||||
}
|
||||
else if(primclass == GS_SPRITE_CLASS)
|
||||
{
|
||||
GSVector4i c0(v[index[i + 0]].m[0]);
|
||||
GSVector4i c1(v[index[i + 1]].m[0]);
|
||||
|
||||
if(color)
|
||||
{
|
||||
if(iip)
|
||||
{
|
||||
cmin = cmin.min_u8(c0.min_u8(c1));
|
||||
cmax = cmax.max_u8(c0.max_u8(c1));
|
||||
}
|
||||
else
|
||||
{
|
||||
cmin = cmin.min_u8(c1);
|
||||
cmax = cmax.max_u8(c1);
|
||||
}
|
||||
}
|
||||
|
||||
if(tme)
|
||||
{
|
||||
if(!fst)
|
||||
{
|
||||
GSVector4 stq0 = GSVector4::cast(c0);
|
||||
GSVector4 stq1 = GSVector4::cast(c1);
|
||||
|
||||
GSVector4 q = stq1.wwww().rcpnr();
|
||||
|
||||
stq0 = (stq0.xyww() * q).xyww(stq1);
|
||||
stq1 = (stq1.xyww() * q).xyww(stq1);
|
||||
|
||||
tmin = tmin.min(stq0.min(stq1));
|
||||
tmax = tmax.max(stq0.max(stq1));
|
||||
}
|
||||
else
|
||||
{
|
||||
GSVector4i uv0(v[index[i + 0]].m[1]);
|
||||
GSVector4i uv1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4 st0 = GSVector4(uv0.uph16()).xyxy();
|
||||
GSVector4 st1 = GSVector4(uv1.uph16()).xyxy();
|
||||
|
||||
tmin = tmin.min(st0.min(st1));
|
||||
tmax = tmax.max(st0.max(st1));
|
||||
}
|
||||
}
|
||||
|
||||
GSVector4i xyzf0(v[index[i + 0]].m[1]);
|
||||
GSVector4i xyzf1(v[index[i + 1]].m[1]);
|
||||
|
||||
GSVector4i xy0 = xyzf0.upl16();
|
||||
GSVector4i z0 = xyzf0.yyyy();
|
||||
GSVector4i xy1 = xyzf1.upl16();
|
||||
GSVector4i z1 = xyzf1.yyyy();
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
GSVector4i p0 = xy0.blend16<0xf0>(z0.uph32(xyzf1));
|
||||
GSVector4i p1 = xy1.blend16<0xf0>(z1.uph32(xyzf1));
|
||||
|
||||
pmin = pmin.min_u32(p0.min_u32(p1));
|
||||
pmax = pmax.max_u32(p0.max_u32(p1));
|
||||
|
||||
#else
|
||||
|
||||
GSVector4 p0 = GSVector4(xy0.upl64(z0.srl32(1).upl32(xyzf1.wwww())));
|
||||
GSVector4 p1 = GSVector4(xy1.upl64(z1.srl32(1).upl32(xyzf1.wwww())));
|
||||
|
||||
pmin = pmin.min(p0.min(p1));
|
||||
pmax = pmax.max(p0.max(p1));
|
||||
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#if _M_SSE >= 0x401
|
||||
|
||||
pmin = pmin.blend16<0x30>(pmin.srl32(1));
|
||||
pmax = pmax.blend16<0x30>(pmax.srl32(1));
|
||||
|
||||
#endif
|
||||
|
||||
GSVector4 o(context->XYOFFSET);
|
||||
GSVector4 s(1.0f / 16, 1.0f / 16, 2.0f, 1.0f);
|
||||
|
||||
|
|
|
@ -5,45 +5,37 @@
|
|||
|
||||
#ifndef GS_IIP
|
||||
#define GS_IIP 0
|
||||
#define GS_PRIM 3
|
||||
#define GS_PRIM 2
|
||||
#endif
|
||||
|
||||
#ifndef PS_BATCH_SIZE
|
||||
#define PS_BATCH_SIZE 2048
|
||||
#define PS_FPSM PSM_PSMCT32
|
||||
#define PS_ZPSM PSM_PSMZ16
|
||||
#endif
|
||||
|
||||
//
|
||||
globallycoherent RWByteAddressBuffer VideoMemory : register(u0);
|
||||
|
||||
//globallycoherent RWTexture2D<uint> VideoMemory : register(u0); // 8192 * 512 R8_UINT
|
||||
|
||||
Buffer<int2> FZBufRow : register(t0);
|
||||
Buffer<int2> FZBufCol : register(t1);
|
||||
Texture2D<float4> Palette : register(t2);
|
||||
Texture2D<float4> TextureL0 : register(t3);
|
||||
Texture2D<float4> TextureL1 : register(t4);
|
||||
Texture2D<float4> TextureL2 : register(t5);
|
||||
Texture2D<float4> TextureL3 : register(t6);
|
||||
Texture2D<float4> TextureL4 : register(t7);
|
||||
Texture2D<float4> TextureL5 : register(t8);
|
||||
Texture2D<float4> TextureL6 : register(t9);
|
||||
|
||||
cbuffer VSConstantBuffer : register(c0)
|
||||
{
|
||||
float4 VertexScale;
|
||||
float4 VertexOffset;
|
||||
};
|
||||
|
||||
cbuffer PSConstantBuffer : register(c0)
|
||||
{
|
||||
// TODO
|
||||
};
|
||||
#define PSM_PSMCT32 0
|
||||
#define PSM_PSMCT24 1
|
||||
#define PSM_PSMCT16 2
|
||||
#define PSM_PSMCT16S 10
|
||||
#define PSM_PSMT8 19
|
||||
#define PSM_PSMT4 20
|
||||
#define PSM_PSMT8H 27
|
||||
#define PSM_PSMT4HL 36
|
||||
#define PSM_PSMT4HH 44
|
||||
#define PSM_PSMZ32 48
|
||||
#define PSM_PSMZ24 49
|
||||
#define PSM_PSMZ16 50
|
||||
#define PSM_PSMZ16S 58
|
||||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float2 st : TEXCOORD0;
|
||||
float4 c : COLOR0;
|
||||
float q : TEXCOORD1;
|
||||
uint2 p : POSITION0;
|
||||
uint z : POSITION1;
|
||||
float2 st : TEXCOORD0;
|
||||
float q : TEXCOORD1;
|
||||
uint2 uv : TEXCOORD2;
|
||||
float4 c : COLOR0;
|
||||
float4 f : COLOR1;
|
||||
};
|
||||
|
||||
|
@ -64,13 +56,38 @@ struct GS_OUTPUT
|
|||
uint id : SV_PrimitiveID;
|
||||
};
|
||||
|
||||
cbuffer VSConstantBuffer : register(c0)
|
||||
{
|
||||
float4 VertexScale;
|
||||
float4 VertexOffset;
|
||||
};
|
||||
|
||||
cbuffer PSConstantBuffer : register(c0)
|
||||
{
|
||||
uint2 WriteMask;
|
||||
};
|
||||
|
||||
struct FragmentLinkItem
|
||||
{
|
||||
uint c, z, id, next;
|
||||
};
|
||||
|
||||
RWByteAddressBuffer VideoMemory : register(u0);
|
||||
RWStructuredBuffer<FragmentLinkItem> FragmentLinkBuffer : register(u1);
|
||||
RWByteAddressBuffer StartOffsetBuffer : register(u2);
|
||||
//RWTexture2D<uint> VideoMemory : register(u2); // 8192 * 512 R8_UINT
|
||||
|
||||
Buffer<int2> FZRowOffset : register(t0);
|
||||
Buffer<int2> FZColOffset : register(t1);
|
||||
Texture2D<float4> Palette : register(t2);
|
||||
Texture2D<float4> Texture : register(t3);
|
||||
|
||||
VS_OUTPUT vs_main(VS_INPUT input)
|
||||
{
|
||||
VS_OUTPUT output;
|
||||
|
||||
output.p = float4(input.p, 0.0f, 0.0f) * VertexScale - VertexOffset;
|
||||
|
||||
output.z = float2(input.z & 0xffff, input.z >> 16);
|
||||
output.z = float2(input.z & 0xffff, input.z >> 16); // TODO: min(input.z, 0xffffff00) ?
|
||||
|
||||
if(VS_TME)
|
||||
{
|
||||
|
@ -97,7 +114,6 @@ VS_OUTPUT vs_main(VS_INPUT input)
|
|||
return output;
|
||||
}
|
||||
|
||||
|
||||
#if GS_PRIM == 0
|
||||
|
||||
[maxvertexcount(1)]
|
||||
|
@ -119,6 +135,7 @@ void gs_main(point VS_OUTPUT input[1], inout PointStream<GS_OUTPUT> stream, uint
|
|||
[maxvertexcount(2)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout LineStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
[unroll]
|
||||
for(int i = 0; i < 2; i++)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
@ -142,6 +159,7 @@ void gs_main(line VS_OUTPUT input[2], inout LineStream<GS_OUTPUT> stream, uint i
|
|||
[maxvertexcount(3)]
|
||||
void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
[unroll]
|
||||
for(int i = 0; i < 3; i++)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
@ -153,7 +171,7 @@ void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream<GS_OUTPUT> stream
|
|||
output.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
if(i != 1) output.c = input[2].c;
|
||||
if(i != 2) output.c = input[2].c;
|
||||
#endif
|
||||
|
||||
stream.Append(output);
|
||||
|
@ -200,71 +218,166 @@ void gs_main(line VS_OUTPUT input[2], inout TriangleStream<GS_OUTPUT> stream, ui
|
|||
|
||||
#endif
|
||||
|
||||
uint CompressColor(float4 f)
|
||||
uint CompressColor32(float4 f)
|
||||
{
|
||||
// is there a faster way?
|
||||
|
||||
uint4 c = (uint4)(f * 0xff) << uint4(0, 8, 16, 24);
|
||||
|
||||
return c.r | c.g | c.b | c.a;
|
||||
}
|
||||
|
||||
void ps_main(GS_OUTPUT input)
|
||||
uint DecompressColor16(uint c)
|
||||
{
|
||||
uint c = CompressColor(input.c);
|
||||
uint z = (uint)(input.z.y * 0x10000 + input.z.x);
|
||||
uint r = (c & 0x001f) << 3;
|
||||
uint g = (c & 0x03e0) << 6;
|
||||
uint b = (c & 0x7c00) << 9;
|
||||
uint a = (c & 0x8000) << 15;
|
||||
|
||||
return r | g | b | a;
|
||||
}
|
||||
|
||||
uint ReadPixel(uint addr)
|
||||
{
|
||||
return VideoMemory.Load(addr) >> ((addr & 2) << 3);
|
||||
}
|
||||
|
||||
void WritePixel(uint addr, uint value, uint psm)
|
||||
{
|
||||
uint tmp;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case PSM_PSMCT32:
|
||||
case PSM_PSMZ32:
|
||||
case PSM_PSMCT24:
|
||||
case PSM_PSMZ24:
|
||||
VideoMemory.Store(addr, value);
|
||||
break;
|
||||
case PSM_PSMCT16:
|
||||
case PSM_PSMCT16S:
|
||||
case PSM_PSMZ16:
|
||||
case PSM_PSMZ16S:
|
||||
tmp = (addr & 2) << 3;
|
||||
value = ((value << tmp) ^ VideoMemory.Load(addr)) & (0x0000ffff << tmp);
|
||||
VideoMemory.InterlockedXor(addr, value, tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ps_main0(GS_OUTPUT input)
|
||||
{
|
||||
uint x = (uint)input.p.x;
|
||||
uint y = (uint)input.p.y;
|
||||
|
||||
uint2 addr = FZBufRow[y] + FZBufCol[x]; // 16-bit address
|
||||
uint tail = FragmentLinkBuffer.IncrementCounter();
|
||||
|
||||
uint2 unaligned = addr.xy & 1; // 16-bit formats can address into the middle of an uint (smallest word size for VideoMemory)
|
||||
uint index = (y << 11) + x;
|
||||
uint next = 0;
|
||||
|
||||
addr = (addr & ~1) * 2;
|
||||
StartOffsetBuffer.InterlockedExchange(index * 4, tail, next);
|
||||
|
||||
//DeviceMemoryBarrier();
|
||||
|
||||
uint zd = VideoMemory.Load(addr.y);
|
||||
|
||||
if(z < zd) discard;
|
||||
|
||||
VideoMemory.Store(addr.y, z);
|
||||
VideoMemory.Store(addr.x, c);
|
||||
|
||||
/*
|
||||
addr <<= 1;
|
||||
FragmentLinkItem item;
|
||||
|
||||
uint2 fa0 = uint2(addr.x & 0x1fff, addr.x >> 13);
|
||||
uint2 fa1 = fa0 + uint2(1, 0);
|
||||
uint2 fa2 = fa0 + uint2(2, 0);
|
||||
uint2 fa3 = fa0 + uint2(3, 0);
|
||||
// TODO: preprocess color (tfx, alpha test), z-test
|
||||
|
||||
uint2 za0 = uint2(addr.y & 0x1fff, addr.y >> 13);
|
||||
uint2 za1 = za0 + uint2(1, 0);
|
||||
uint2 za2 = za0 + uint2(2, 0);
|
||||
uint2 za3 = za0 + uint2(3, 0);
|
||||
item.c = CompressColor32(input.c);
|
||||
item.z = (uint)(input.z.y * 0x10000 + input.z.x);
|
||||
item.id = input.id;
|
||||
item.next = next;
|
||||
|
||||
DeviceMemoryBarrier();
|
||||
|
||||
uint zd =
|
||||
(VideoMemory[za0] << 0) |
|
||||
(VideoMemory[za1] << 8) |
|
||||
(VideoMemory[za2] << 16) |
|
||||
(VideoMemory[za3] << 24);
|
||||
|
||||
if(zd >= z) discard;
|
||||
|
||||
VideoMemory[za0] = (z >> 0) & 0xff;
|
||||
VideoMemory[za1] = (z >> 8) & 0xff;
|
||||
VideoMemory[za2] = (z >> 16) & 0xff;
|
||||
VideoMemory[za3] = (z >> 24) & 0xff;
|
||||
|
||||
DeviceMemoryBarrier();
|
||||
|
||||
VideoMemory[fa0] = (c >> 0) & 0xff;
|
||||
VideoMemory[fa1] = (c >> 8) & 0xff;
|
||||
VideoMemory[fa2] = (c >> 16) & 0xff;
|
||||
VideoMemory[fa3] = (c >> 24) & 0xff;
|
||||
*/
|
||||
FragmentLinkBuffer[tail] = item;
|
||||
}
|
||||
|
||||
void ps_main1(GS_OUTPUT input)
|
||||
{
|
||||
uint2 pos = (uint2)input.p.xy;
|
||||
|
||||
// sort fragments
|
||||
|
||||
uint StartOffsetIndex = (pos.y << 11) + pos.x;
|
||||
|
||||
int index[PS_BATCH_SIZE];
|
||||
int count = 0;
|
||||
|
||||
uint next = StartOffsetBuffer.Load(StartOffsetIndex * 4);
|
||||
|
||||
StartOffsetBuffer.Store(StartOffsetIndex * 4, 0);
|
||||
|
||||
[allow_uav_condition]
|
||||
while(next != 0)
|
||||
{
|
||||
index[count++] = next;
|
||||
|
||||
next = FragmentLinkBuffer[next].next;
|
||||
}
|
||||
|
||||
int N2 = 1 << (int)(ceil(log2(count)));
|
||||
|
||||
[allow_uav_condition]
|
||||
for(int i = count; i < N2; i++)
|
||||
{
|
||||
index[i] = 0;
|
||||
}
|
||||
|
||||
[allow_uav_condition]
|
||||
for(int k = 2; k <= N2; k = 2 * k)
|
||||
{
|
||||
[allow_uav_condition]
|
||||
for(int j = k >> 1; j > 0 ; j = j >> 1)
|
||||
{
|
||||
[allow_uav_condition]
|
||||
for(int i = 0; i < N2; i++)
|
||||
{
|
||||
uint i_id = FragmentLinkBuffer[index[i]].id;
|
||||
|
||||
int ixj = i ^ j;
|
||||
|
||||
if(ixj > i)
|
||||
{
|
||||
uint ixj_id = FragmentLinkBuffer[index[ixj]].id;
|
||||
|
||||
if((i & k) == 0 && i_id > ixj_id)
|
||||
{
|
||||
int temp = index[i];
|
||||
index[i] = index[ixj];
|
||||
index[ixj] = temp;
|
||||
}
|
||||
|
||||
if((i & k) != 0 && i_id < ixj_id)
|
||||
{
|
||||
int temp = index[i];
|
||||
index[i] = index[ixj];
|
||||
index[ixj] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint2 addr = (uint2)(FZRowOffset[pos.y] + FZColOffset[pos.x]) << 1;
|
||||
|
||||
uint dc = ReadPixel(addr.x);
|
||||
uint dz = ReadPixel(addr.y);
|
||||
|
||||
uint sc = dc;
|
||||
uint sz = dz;
|
||||
|
||||
[allow_uav_condition]
|
||||
while(--count >= 0)
|
||||
{
|
||||
FragmentLinkItem f = FragmentLinkBuffer[index[count]];
|
||||
|
||||
// TODO
|
||||
|
||||
if(sz < f.z)
|
||||
{
|
||||
sc = f.c;
|
||||
sz = f.z;
|
||||
}
|
||||
}
|
||||
|
||||
uint c = sc; // (dc & ~WriteMask.x) | (sc & WriteMask.x);
|
||||
uint z = 0;//sz; //(dz & ~WriteMask.y) | (sz & WriteMask.y);
|
||||
|
||||
WritePixel(addr.x, c, PS_FPSM);
|
||||
WritePixel(addr.y, z, PS_ZPSM);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue