mirror of https://github.com/PCSX2/pcsx2.git
gsdx: remove DirectCompute backend
It hasn't been accessible since OpenCL was added, and it was barely updated since it was introduced.
This commit is contained in:
parent
15efe69e46
commit
8f6a3d9fe8
|
@ -148,7 +148,6 @@ set(GSdxHeaders
|
|||
GSPng.h
|
||||
GSRasterizer.h
|
||||
GSRendererCL.h
|
||||
GSRendererCS.h
|
||||
GSRenderer.h
|
||||
GSRendererHW.h
|
||||
GSRendererNull.h
|
||||
|
@ -208,7 +207,6 @@ if(Windows)
|
|||
GSDevice9.cpp
|
||||
GSDeviceDX.cpp
|
||||
GSDialog.cpp
|
||||
GSRendererCS.cpp
|
||||
GSRendererDX11.cpp
|
||||
GSRendererDX9.cpp
|
||||
GSRendererDX.cpp
|
||||
|
|
|
@ -38,7 +38,6 @@
|
|||
#include "GSDevice11.h"
|
||||
#include "GSWndDX.h"
|
||||
#include "GSWndWGL.h"
|
||||
#include "GSRendererCS.h"
|
||||
#include "GSSettingsDlg.h"
|
||||
|
||||
static HRESULT s_hr = E_FAIL;
|
||||
|
|
|
@ -1211,42 +1211,6 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
|
||||
{
|
||||
if(m_state.cs_srv[i] != srv)
|
||||
{
|
||||
m_state.cs_srv[i] = srv;
|
||||
|
||||
m_ctx->CSSetShaderResources(i, 1, &srv);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav)
|
||||
{
|
||||
uint32 counters[8];
|
||||
|
||||
memset(counters, 0, sizeof(counters));
|
||||
|
||||
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, counters);
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb)
|
||||
{
|
||||
if(m_state.cs != cs)
|
||||
{
|
||||
m_state.cs = cs;
|
||||
|
||||
m_ctx->CSSetShader(cs, NULL, 0);
|
||||
}
|
||||
|
||||
if(m_state.cs_cb != cs_cb)
|
||||
{
|
||||
m_state.cs_cb = cs_cb;
|
||||
|
||||
m_ctx->CSSetConstantBuffers(0, 1, &cs_cb);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
|
||||
{
|
||||
if(m_state.dss != dss || m_state.sref != sref)
|
||||
|
@ -1479,33 +1443,3 @@ void GSDevice11::CompileShader(const char* source, size_t size, const char* fn,
|
|||
throw GSDXRecoverableError();
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::CompileShader(const char* source, size_t size, const char *fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
std::vector<D3D_SHADER_MACRO> m;
|
||||
|
||||
PrepareShaderMacro(m, macro);
|
||||
|
||||
CComPtr<ID3DBlob> shader, error;
|
||||
|
||||
hr = s_pD3DCompile(source, size, fn, &m[0], s_old_d3d_compiler_dll ? nullptr : include, entry, m_shader.cs.c_str(), 0, 0, &shader, &error);
|
||||
|
||||
if(error)
|
||||
{
|
||||
printf("%s\n", (const char*)error->GetBufferPointer());
|
||||
}
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
throw GSDXRecoverableError();
|
||||
}
|
||||
|
||||
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(), NULL, cs);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
throw GSDXRecoverableError();
|
||||
}
|
||||
}
|
||||
|
|
|
@ -75,9 +75,6 @@ class GSDevice11 : public GSDeviceDX
|
|||
ID3D11PixelShader* ps;
|
||||
ID3D11Buffer* ps_cb;
|
||||
ID3D11SamplerState* ps_ss[3];
|
||||
ID3D11ShaderResourceView* cs_srv[16];
|
||||
ID3D11ComputeShader* cs;
|
||||
ID3D11Buffer* cs_cb;
|
||||
GSVector2i viewport;
|
||||
GSVector4i scissor;
|
||||
ID3D11DepthStencilState* dss;
|
||||
|
@ -215,9 +212,6 @@ public:
|
|||
void PSSetShaderResourceView(int i, ID3D11ShaderResourceView* srv);
|
||||
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
|
||||
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL);
|
||||
void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv);
|
||||
void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav);
|
||||
void CSSetShader(ID3D11ComputeShader* cs, ID3D11Buffer* cs_cb);
|
||||
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
|
||||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||
|
@ -239,6 +233,5 @@ public:
|
|||
void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
|
||||
void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11GeometryShader** gs, D3D11_SO_DECLARATION_ENTRY* layout, int count);
|
||||
void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11PixelShader** ps);
|
||||
void CompileShader(const char* source, size_t size, const char* fn, ID3DInclude *include, const char* entry, D3D_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
|
||||
};
|
||||
|
||||
|
|
|
@ -1,877 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSRendererCS.h"
|
||||
|
||||
#define PS_BATCH_SIZE 512
|
||||
|
||||
GSRendererCS::GSRendererCS()
|
||||
: GSRenderer()
|
||||
{
|
||||
m_nativeres = true;
|
||||
|
||||
memset(m_vm_valid, 0, sizeof(m_vm_valid));
|
||||
|
||||
memset(m_texture, 0, sizeof(m_texture));
|
||||
|
||||
m_output = (uint8*)_aligned_malloc(1024 * 1024 * sizeof(uint32), 32);
|
||||
}
|
||||
|
||||
GSRendererCS::~GSRendererCS()
|
||||
{
|
||||
for(size_t i = 0; i < countof(m_texture); i++)
|
||||
{
|
||||
delete m_texture[i];
|
||||
}
|
||||
|
||||
_aligned_free(m_output);
|
||||
}
|
||||
|
||||
bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
||||
{
|
||||
if(!__super::CreateDevice(dev_unk))
|
||||
return false;
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
D3D11_DEPTH_STENCIL_DESC dsd;
|
||||
D3D11_BLEND_DESC bsd;
|
||||
D3D11_SAMPLER_DESC sd;
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_TEXTURE2D_DESC td;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
|
||||
D3D_FEATURE_LEVEL level;
|
||||
|
||||
((GSDeviceDX*)dev_unk)->GetFeatureLevel(level);
|
||||
|
||||
if(level < D3D_FEATURE_LEVEL_11_0)
|
||||
return false;
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)dev_unk;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
// empty depth stencil state
|
||||
|
||||
memset(&dsd, 0, sizeof(dsd));
|
||||
|
||||
dsd.StencilEnable = false;
|
||||
dsd.DepthEnable = false;
|
||||
|
||||
hr = (*dev)->CreateDepthStencilState(&dsd, &m_dss);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// empty blend state
|
||||
|
||||
memset(&bsd, 0, sizeof(bsd));
|
||||
|
||||
bsd.RenderTarget[0].BlendEnable = false;
|
||||
|
||||
hr = (*dev)->CreateBlendState(&bsd, &m_bs);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// point sampler
|
||||
|
||||
memset(&sd, 0, sizeof(sd));
|
||||
|
||||
sd.Filter = D3D11_FILTER_MIN_MAG_MIP_POINT;
|
||||
sd.AddressU = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.AddressV = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.AddressW = D3D11_TEXTURE_ADDRESS_CLAMP;
|
||||
sd.MinLOD = -FLT_MAX;
|
||||
sd.MaxLOD = FLT_MAX;
|
||||
sd.MaxAnisotropy = theApp.GetConfigI("MaxAnisotropy");
|
||||
sd.ComparisonFunc = D3D11_COMPARISON_NEVER;
|
||||
|
||||
hr = (*dev)->CreateSamplerState(&sd, &m_ss);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// link buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 256 << 20; // 256 MB w00t
|
||||
bd.StructureByteStride = sizeof(uint32) * 4; // c, z, id, next
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_lb);
|
||||
|
||||
{
|
||||
uint32 data[] = {0, 0, 0xffffffff, 0};
|
||||
|
||||
D3D11_BOX box;
|
||||
memset(&box, 0, sizeof(box));
|
||||
box.right = sizeof(data);
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
ctx->UpdateSubresource(m_lb, 0, &box, data, 0, 0);
|
||||
}
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
uavd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride;
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_COUNTER;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_lb, &uavd, &m_lb_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvd.Buffer.NumElements = bd.ByteWidth / bd.StructureByteStride;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_lb, &srvd, &m_lb_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// start offset buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(uint32) * 2048 * 2048; // index
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS | D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_sob);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavd.Buffer.NumElements = bd.ByteWidth / sizeof(uint32);
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_sob, &uavd, &m_sob_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
srvd.BufferEx.NumElements = bd.ByteWidth / sizeof(uint32);
|
||||
srvd.BufferEx.Flags = D3D11_BUFFEREX_SRV_FLAG_RAW;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFEREX;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_sob, &srvd, &m_sob_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
const uint32 tmp = 0;
|
||||
|
||||
ctx->ClearUnorderedAccessViewUint(m_sob_uav, &tmp); // initial clear, next time Draw should restore it in Step 2
|
||||
|
||||
// video memory (4MB)
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 4 * 1024 * 1024;
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vm);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavd.Buffer.FirstElement = 0;
|
||||
uavd.Buffer.NumElements = 1024 * 1024;
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
/*
|
||||
memset(&td, 0, sizeof(td));
|
||||
|
||||
td.Width = PAGE_SIZE;
|
||||
td.Height = MAX_PAGES;
|
||||
td.Format = DXGI_FORMAT_R8_UINT;
|
||||
td.MipLevels = 1;
|
||||
td.ArraySize = 1;
|
||||
td.SampleDesc.Count = 1;
|
||||
td.SampleDesc.Quality = 0;
|
||||
td.Usage = D3D11_USAGE_DEFAULT;
|
||||
td.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
|
||||
|
||||
hr = (*dev)->CreateTexture2D(&td, NULL, &m_vm);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_R8_UINT;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_TEXTURE2D;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
*/
|
||||
// one page, for copying between cpu<->gpu
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = PAGE_SIZE;
|
||||
bd.Usage = D3D11_USAGE_STAGING;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_pb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
/*
|
||||
memset(&td, 0, sizeof(td));
|
||||
|
||||
td.Width = PAGE_SIZE;
|
||||
td.Height = 1;
|
||||
td.Format = DXGI_FORMAT_R8_UINT;
|
||||
td.MipLevels = 1;
|
||||
td.ArraySize = 1;
|
||||
td.SampleDesc.Count = 1;
|
||||
td.SampleDesc.Quality = 0;
|
||||
td.Usage = D3D11_USAGE_STAGING;
|
||||
td.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
||||
|
||||
hr = (*dev)->CreateTexture2D(&td, NULL, &m_pb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
*/
|
||||
// VSConstantBuffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(VSConstantBuffer);
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vs_cb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// PS
|
||||
|
||||
D3D_SHADER_MACRO macro[] =
|
||||
{
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
try
|
||||
{
|
||||
std::vector<char> shader;
|
||||
theApp.LoadResource(IDR_CS_FX, shader);
|
||||
dev->CompileShader(shader.data(), shader.size(), "cs.fx", nullptr, "ps_main0", macro, &m_ps0);
|
||||
}
|
||||
catch (GSDXRecoverableError)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// PSConstantBuffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(PSConstantBuffer);
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_ps_cb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
//
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void GSRendererCS::ResetDevice()
|
||||
{
|
||||
for(size_t i = 0; i < countof(m_texture); i++)
|
||||
{
|
||||
delete m_texture[i];
|
||||
|
||||
m_texture[i] = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererCS::VSync(int field)
|
||||
{
|
||||
__super::VSync(field);
|
||||
|
||||
//printf("%lld\n", m_perfmon.GetFrame());
|
||||
}
|
||||
|
||||
GSTexture* GSRendererCS::GetOutput(int i, int& y_offset)
|
||||
{
|
||||
// TODO: create a compute shader which unswizzles the frame from m_vm to the output texture
|
||||
|
||||
const GSRegDISPFB& DISPFB = m_regs->DISP[i].DISPFB;
|
||||
|
||||
int w = DISPFB.FBW * 64;
|
||||
int h = GetFramebufferHeight();
|
||||
|
||||
// TODO: round up bottom
|
||||
|
||||
if(m_dev->ResizeTexture(&m_texture[i], w, h))
|
||||
{
|
||||
const GSLocalMemory::psm_t& psm = GSLocalMemory::m_psm[DISPFB.PSM];
|
||||
|
||||
GSVector4i r(0, 0, w, h);
|
||||
GSVector4i r2 = r.ralign<Align_Outside>(psm.bs);
|
||||
|
||||
GSOffset* off = m_mem.GetOffset(DISPFB.Block(), DISPFB.FBW, DISPFB.PSM);
|
||||
|
||||
Read(off, r2, false);
|
||||
|
||||
(m_mem.*psm.rtx)(off, r2, m_output, 1024 * 4, m_env.TEXA);
|
||||
|
||||
m_texture[i]->Update(r, m_output, 1024 * 4);
|
||||
|
||||
if(s_dump)
|
||||
{
|
||||
if(s_save && s_n >= s_saven)
|
||||
{
|
||||
m_texture[i]->Save(format("c:\\temp1\\_%05d_f%lld_fr%d_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), i, (int)DISPFB.Block(), (int)DISPFB.PSM));
|
||||
}
|
||||
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
return m_texture[i];
|
||||
}
|
||||
|
||||
void GSRendererCS::Draw()
|
||||
{
|
||||
GSDrawingEnvironment& env = m_env;
|
||||
GSDrawingContext* context = m_context;
|
||||
|
||||
GSVector2i rtsize(2048, 2048);
|
||||
GSVector4i scissor = GSVector4i(context->scissor.in).rintersect(GSVector4i(rtsize).zwxy());
|
||||
GSVector4i bbox = GSVector4i(m_vt.m_min.p.floor().xyxy(m_vt.m_max.p.ceil()));
|
||||
GSVector4i r = bbox.rintersect(scissor);
|
||||
|
||||
uint32 fm = context->FRAME.FBMSK;
|
||||
uint32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
|
||||
|
||||
if(fm != 0xffffffff)
|
||||
{
|
||||
Write(context->offset.fb, r);
|
||||
|
||||
// TODO: m_tc->InvalidateVideoMem(context->offset.fb, r, false);
|
||||
}
|
||||
|
||||
if(zm != 0xffffffff)
|
||||
{
|
||||
Write(context->offset.zb, r);
|
||||
|
||||
// TODO: m_tc->InvalidateVideoMem(context->offset.zb, r, false);
|
||||
}
|
||||
|
||||
// TODO: if(24-bit) fm/zm |= 0xff000000;
|
||||
|
||||
if(PRIM->TME)
|
||||
{
|
||||
m_mem.m_clut.Read32(context->TEX0, env.TEXA);
|
||||
|
||||
GSVector4i r;
|
||||
|
||||
GetTextureMinMax(r, context->TEX0, context->CLAMP, m_vt.IsLinear());
|
||||
|
||||
// TODO: unswizzle pages of r to a texture, check m_vm_valid, bit not set cpu->gpu, set gpu->gpu
|
||||
|
||||
// TODO: Write transfer should directly write to m_vm, then Read/Write syncing won't be necessary, clut must be updated with the gpu also
|
||||
|
||||
// TODO: tex = m_tc->LookupSource(context->TEX0, env.TEXA, r);
|
||||
|
||||
// if(!tex) return;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
//
|
||||
|
||||
dev->BeginScene();
|
||||
|
||||
// SetupOM
|
||||
|
||||
dev->OMSetDepthStencilState(m_dss, 0);
|
||||
dev->OMSetBlendState(m_bs, 0);
|
||||
|
||||
ID3D11UnorderedAccessView* uavs[] = {m_vm_uav, m_lb_uav, m_sob_uav};
|
||||
uint32 counters[] = {1, 0, 0};
|
||||
|
||||
dev->OMSetRenderTargets(rtsize, countof(uavs), uavs, counters, &scissor);
|
||||
|
||||
// SetupIA
|
||||
|
||||
D3D11_PRIMITIVE_TOPOLOGY topology;
|
||||
|
||||
switch(m_vt.m_primclass)
|
||||
{
|
||||
case GS_POINT_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST;
|
||||
break;
|
||||
case GS_LINE_CLASS:
|
||||
case GS_SPRITE_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST;
|
||||
break;
|
||||
case GS_TRIANGLE_CLASS:
|
||||
topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST;
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
GSVector4i r2 = bbox.add32(GSVector4i(-1, -1, 1, 1)).rintersect(scissor);
|
||||
|
||||
m_vertex.buff[m_vertex.next + 0].XYZ.X = (uint16)(context->XYOFFSET.OFX + (r2.left << 4));
|
||||
m_vertex.buff[m_vertex.next + 0].XYZ.Y = (uint16)(context->XYOFFSET.OFY + (r2.top << 4));
|
||||
m_vertex.buff[m_vertex.next + 1].XYZ.X = (uint16)(context->XYOFFSET.OFX + (r2.right << 4));
|
||||
m_vertex.buff[m_vertex.next + 1].XYZ.Y = (uint16)(context->XYOFFSET.OFY + (r2.bottom << 4));
|
||||
|
||||
m_index.buff[m_index.tail + 0] = m_vertex.next + 0;
|
||||
m_index.buff[m_index.tail + 1] = m_vertex.next + 1;
|
||||
|
||||
dev->IASetVertexBuffer(m_vertex.buff, sizeof(GSVertex), m_vertex.next + 2);
|
||||
dev->IASetIndexBuffer(m_index.buff, m_index.tail + 2);
|
||||
|
||||
// SetupVS
|
||||
|
||||
VSSelector vs_sel;
|
||||
|
||||
vs_sel.tme = PRIM->TME;
|
||||
vs_sel.fst = PRIM->FST;
|
||||
|
||||
VSConstantBuffer vs_cb;
|
||||
|
||||
float sx = 2.0f / (rtsize.x << 4);
|
||||
float sy = 2.0f / (rtsize.y << 4);
|
||||
//float sx = 1.0f / 16;
|
||||
//float sy = 1.0f / 16;
|
||||
float ox = (float)(int)context->XYOFFSET.OFX;
|
||||
float oy = (float)(int)context->XYOFFSET.OFY;
|
||||
|
||||
vs_cb.VertexScale = GSVector4(sx, -sy, 0.0f, 0.0f);
|
||||
vs_cb.VertexOffset = GSVector4(ox * sx + 1, -(oy * sy + 1), 0.0f, -1.0f);
|
||||
//vs_cb.VertexScale = GSVector4(sx, sy, 0.0f, 0.0f);
|
||||
//vs_cb.VertexOffset = GSVector4(ox * sx, oy * sy, 0.0f, -1.0f);
|
||||
|
||||
{
|
||||
GSVertexShader11 vs;
|
||||
|
||||
auto i = std::as_const(m_vs).find(vs_sel);
|
||||
|
||||
if(i != m_vs.end())
|
||||
{
|
||||
vs = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string str[2];
|
||||
|
||||
str[0] = format("%d", vs_sel.tme);
|
||||
str[1] = format("%d", vs_sel.fst);
|
||||
|
||||
D3D_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"VS_TME", str[0].c_str()},
|
||||
{"VS_FST", str[1].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
D3D11_INPUT_ELEMENT_DESC layout[] =
|
||||
{
|
||||
{"TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 0, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 8, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"TEXCOORD", 1, DXGI_FORMAT_R32_FLOAT, 0, 12, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 0, DXGI_FORMAT_R16G16_UINT, 0, 16, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"POSITION", 1, DXGI_FORMAT_R32_UINT, 0, 20, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"TEXCOORD", 2, DXGI_FORMAT_R16G16_UINT, 0, 24, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
{"COLOR", 1, DXGI_FORMAT_R8G8B8A8_UNORM, 0, 28, D3D11_INPUT_PER_VERTEX_DATA, 0},
|
||||
};
|
||||
|
||||
std::vector<char> shader;
|
||||
theApp.LoadResource(IDR_CS_FX, shader);
|
||||
dev->CompileShader(shader.data(), shader.size(), "cs.fx", nullptr, "vs_main", macro, &vs.vs, layout, countof(layout), &vs.il);
|
||||
|
||||
m_vs[vs_sel] = vs;
|
||||
}
|
||||
|
||||
ctx->UpdateSubresource(m_vs_cb, 0, NULL, &vs_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
dev->VSSetShader(vs.vs, m_vs_cb);
|
||||
|
||||
dev->IASetInputLayout(vs.il);
|
||||
}
|
||||
|
||||
// SetupGS
|
||||
|
||||
GSSelector gs_sel;
|
||||
|
||||
gs_sel.iip = PRIM->IIP;
|
||||
|
||||
CComPtr<ID3D11GeometryShader> gs[2];
|
||||
|
||||
for(int j = 0; j < 2; j++)
|
||||
{
|
||||
gs_sel.prim = j == 0 ? m_vt.m_primclass : GS_SPRITE_CLASS;
|
||||
|
||||
auto i = std::as_const(m_gs).find(gs_sel);
|
||||
|
||||
if(i != m_gs.end())
|
||||
{
|
||||
gs[j] = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string str[2];
|
||||
|
||||
str[0] = format("%d", gs_sel.iip);
|
||||
str[1] = format("%d", j == 0 ? gs_sel.prim : GS_SPRITE_CLASS);
|
||||
|
||||
D3D_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"GS_IIP", str[0].c_str()},
|
||||
{"GS_PRIM", str[1].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
std::vector<char> shader;
|
||||
theApp.LoadResource(IDR_CS_FX, shader);
|
||||
dev->CompileShader(shader.data(), shader.size(), "cs.fx", nullptr, "gs_main", macro, &gs[j]);
|
||||
|
||||
m_gs[gs_sel] = gs[j];
|
||||
}
|
||||
}
|
||||
|
||||
// SetupPS
|
||||
|
||||
dev->PSSetSamplerState(m_ss, NULL, NULL);
|
||||
|
||||
PSSelector ps_sel;
|
||||
|
||||
ps_sel.fpsm = context->FRAME.PSM;
|
||||
ps_sel.zpsm = context->ZBUF.PSM;
|
||||
|
||||
CComPtr<ID3D11PixelShader> ps[2] = {m_ps0, NULL};
|
||||
|
||||
auto i = std::as_const(m_ps1).find(ps_sel);
|
||||
|
||||
if(i != m_ps1.end())
|
||||
{
|
||||
ps[1] = i->second;
|
||||
}
|
||||
else
|
||||
{
|
||||
std::string str[15];
|
||||
|
||||
str[0] = format("%d", PS_BATCH_SIZE);
|
||||
str[1] = format("%d", context->FRAME.PSM);
|
||||
str[2] = format("%d", context->ZBUF.PSM);
|
||||
|
||||
D3D_SHADER_MACRO macro[] =
|
||||
{
|
||||
{"PS_BATCH_SIZE", str[0].c_str()},
|
||||
{"PS_FPSM", str[1].c_str()},
|
||||
{"PS_ZPSM", str[2].c_str()},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
std::vector<char> shader;
|
||||
theApp.LoadResource(IDR_CS_FX, shader);
|
||||
dev->CompileShader(shader.data(), shader.size(), "cs.fx", nullptr, "ps_main1", macro, &ps[1]);
|
||||
|
||||
m_ps1[ps_sel] = ps[1];
|
||||
}
|
||||
|
||||
PSConstantBuffer ps_cb;
|
||||
|
||||
ps_cb.fm = fm;
|
||||
ps_cb.zm = zm;
|
||||
|
||||
ctx->UpdateSubresource(m_ps_cb, 0, NULL, &ps_cb, 0, 0); // TODO: only update if changed
|
||||
|
||||
OffsetBuffer* fzbo = NULL;
|
||||
|
||||
GetOffsetBuffer(&fzbo);
|
||||
|
||||
dev->PSSetShaderResourceView(0, fzbo->row_srv);
|
||||
dev->PSSetShaderResourceView(1, fzbo->col_srv);
|
||||
// TODO: palette, texture
|
||||
|
||||
int step = PS_BATCH_SIZE * GSUtil::GetVertexCount(PRIM->PRIM);
|
||||
|
||||
for(uint32 i = 0; i < m_index.tail; i += step)
|
||||
{
|
||||
dev->IASetPrimitiveTopology(topology);
|
||||
dev->GSSetShader(gs[0]);
|
||||
dev->PSSetShader(ps[0], m_ps_cb);
|
||||
dev->DrawIndexedPrimitive(i, std::min<int>(m_index.tail - i, step));
|
||||
|
||||
dev->IASetPrimitiveTopology(D3D11_PRIMITIVE_TOPOLOGY_LINELIST);
|
||||
dev->GSSetShader(gs[1]);
|
||||
dev->PSSetShader(ps[1], m_ps_cb);
|
||||
dev->DrawIndexedPrimitive(m_index.tail, 2);
|
||||
|
||||
//printf("%d/%d, %d %d %d %d\n", i, m_index.tail, r2.x, r2.y, r2.z, r2.w);
|
||||
}
|
||||
|
||||
dev->EndScene();
|
||||
|
||||
if(0)
|
||||
{
|
||||
std::string s;
|
||||
/*
|
||||
s = format("c:\\temp1\\_%05d_f%lld_fb0_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0);
|
||||
m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
Read(m_mem.GetOffset(0, 16, PSM_PSMCT32), GSVector4i(0, 0, 1024, 1024), false);
|
||||
*/
|
||||
//
|
||||
if(fm != 0xffffffff) Read(context->offset.fb, r, false);
|
||||
//
|
||||
if(zm != 0xffffffff) Read(context->offset.zb, r, false);
|
||||
|
||||
s = format("c:\\temp1\\_%05d_f%lld_rt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->FRAME.Block(), m_context->FRAME.PSM);
|
||||
m_mem.SaveBMP(s, m_context->FRAME.Block(), m_context->FRAME.FBW, m_context->FRAME.PSM, GetFrameRect().width(), 512);
|
||||
|
||||
s = format("c:\\temp1\\_%05d_f%lld_zt1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), m_context->ZBUF.Block(), m_context->ZBUF.PSM);
|
||||
m_mem.SaveBMP(s, m_context->ZBUF.Block(), m_context->FRAME.FBW, m_context->ZBUF.PSM, GetFrameRect().width(), 512);
|
||||
|
||||
/*
|
||||
s = format("c:\\temp1\\_%05d_f%lld_fb1_%05x_%d.bmp", s_n, m_perfmon.GetFrame(), 0, 0);
|
||||
m_mem.SaveBMP(s, 0, 16, PSM_PSMCT32, 1024, 1024);
|
||||
*/
|
||||
|
||||
s_n++;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
GSOffset* off = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
Read(off, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated (important)
|
||||
|
||||
// TODO: false deps, 8H/4HL/4HH texture sharing pages with 24-bit target
|
||||
// TODO: invalidate texture cache
|
||||
}
|
||||
|
||||
void GSRendererCS::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
GSOffset* off = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||
|
||||
Read(off, r, false);
|
||||
}
|
||||
|
||||
void GSRendererCS::Write(GSOffset* off, const GSVector4i& r)
|
||||
{
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
D3D11_BOX box;
|
||||
|
||||
memset(&box, 0, sizeof(box));
|
||||
|
||||
box.right = 1;
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
uint32* pages = off->GetPages(r);
|
||||
|
||||
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
|
||||
{
|
||||
uint32 page = pages[i];
|
||||
|
||||
uint32 row = page >> 5;
|
||||
uint32 col = 1 << (page & 31);
|
||||
|
||||
if((m_vm_valid[row] & col) == 0)
|
||||
{
|
||||
m_vm_valid[row] |= col;
|
||||
|
||||
box.left = page * PAGE_SIZE;
|
||||
box.right = (page + 1) * PAGE_SIZE;
|
||||
|
||||
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + page * PAGE_SIZE, 0, 0);
|
||||
/*
|
||||
// m_vm texture row is 2k in bytes, one page is 8k => starting row: addr / 4k, number of rows: 8k / 2k = 4
|
||||
|
||||
box.left = 0;
|
||||
box.right = PAGE_SIZE;
|
||||
box.top = page;
|
||||
box.bottom = box.top + 1;
|
||||
|
||||
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + page * PAGE_SIZE, 0, 0);
|
||||
*/
|
||||
if(0)
|
||||
printf("[%lld] write %05x %u %u (%u)\n", __rdtsc(), off->bp, off->bw, off->psm, page);
|
||||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
void GSRendererCS::Read(GSOffset* off, const GSVector4i& r, bool invalidate)
|
||||
{
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
D3D11_BOX box;
|
||||
|
||||
memset(&box, 0, sizeof(box));
|
||||
|
||||
box.right = 1;
|
||||
box.bottom = 1;
|
||||
box.back = 1;
|
||||
|
||||
uint32* pages = off->GetPages(r);
|
||||
|
||||
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
|
||||
{
|
||||
uint32 page = pages[i];
|
||||
|
||||
uint32 row = page >> 5;
|
||||
uint32 col = 1 << (page & 31);
|
||||
|
||||
if(m_vm_valid[row] & col)
|
||||
{
|
||||
if(invalidate)
|
||||
{
|
||||
m_vm_valid[row] ^= col;
|
||||
}
|
||||
|
||||
box.left = page * PAGE_SIZE;
|
||||
box.right = (page + 1) * PAGE_SIZE;
|
||||
|
||||
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
|
||||
/*
|
||||
// m_vm texture row is 2k in bytes, one page is 8k => starting row: addr / 4k, number of rows: 8k / 2k = 4
|
||||
|
||||
box.left = 0;
|
||||
box.right = PAGE_SIZE;
|
||||
box.top = page;
|
||||
box.bottom = box.top + 1;
|
||||
|
||||
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
|
||||
*/
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
||||
if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ, 0, &map)))
|
||||
{
|
||||
memcpy(m_mem.m_vm8 + page * PAGE_SIZE, map.pData, PAGE_SIZE);
|
||||
|
||||
ctx->Unmap(m_pb, 0);
|
||||
|
||||
if(0)
|
||||
printf("[%lld] read %05x %u %u (%u)\n", __rdtsc(), off->bp, off->bw, off->psm, page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
bool GSRendererCS::GetOffsetBuffer(OffsetBuffer** fzbo)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
D3D11_SUBRESOURCE_DATA data;
|
||||
|
||||
auto i = m_offset.find(m_context->offset.fzb->hash);
|
||||
|
||||
if(i == m_offset.end())
|
||||
{
|
||||
OffsetBuffer ob;
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(GSVector2i) * 2048;
|
||||
bd.Usage = D3D11_USAGE_IMMUTABLE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
|
||||
data.pSysMem = m_context->offset.fzb->row;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &ob.row);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
data.pSysMem = m_context->offset.fzb->col;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &ob.col);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32G32_SINT;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = 2048;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(ob.row, &srvd, &ob.row_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(ob.col, &srvd, &ob.col_srv);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
m_offset[m_context->offset.fzb->hash] = ob;
|
||||
|
||||
i = m_offset.find(m_context->offset.fzb->hash);
|
||||
}
|
||||
|
||||
*fzbo = &i->second;
|
||||
|
||||
return true;
|
||||
}
|
|
@ -1,145 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSRenderer.h"
|
||||
#include "GSDevice11.h"
|
||||
|
||||
class GSRendererCS : public GSRenderer
|
||||
{
|
||||
struct VSSelector
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 tme:1;
|
||||
uint32 fst:1;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x3;}
|
||||
|
||||
VSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
struct alignas(32) VSConstantBuffer
|
||||
{
|
||||
GSVector4 VertexScale;
|
||||
GSVector4 VertexOffset;
|
||||
};
|
||||
|
||||
struct GSSelector
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 iip:1;
|
||||
uint32 prim:2;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x7;}
|
||||
|
||||
GSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
struct PSSelector
|
||||
{
|
||||
union
|
||||
{
|
||||
struct
|
||||
{
|
||||
uint32 fpsm:6;
|
||||
uint32 zpsm:6;
|
||||
};
|
||||
|
||||
uint32 key;
|
||||
};
|
||||
|
||||
operator uint32() {return key & 0x3ff;}
|
||||
|
||||
PSSelector() : key(0) {}
|
||||
};
|
||||
|
||||
struct alignas(32) PSConstantBuffer
|
||||
{
|
||||
uint32 fm;
|
||||
uint32 zm;
|
||||
};
|
||||
|
||||
CComPtr<ID3D11DepthStencilState> m_dss;
|
||||
CComPtr<ID3D11BlendState> m_bs;
|
||||
CComPtr<ID3D11SamplerState> m_ss;
|
||||
CComPtr<ID3D11Buffer> m_lb;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_lb_uav;
|
||||
CComPtr<ID3D11ShaderResourceView> m_lb_srv;
|
||||
CComPtr<ID3D11Buffer> m_sob;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_sob_uav;
|
||||
CComPtr<ID3D11ShaderResourceView> m_sob_srv;
|
||||
CComPtr<ID3D11Buffer> m_vm;
|
||||
//CComPtr<ID3D11Texture2D> m_vm;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_vm_uav;
|
||||
uint32 m_vm_valid[16];
|
||||
CComPtr<ID3D11Buffer> m_pb;
|
||||
//CComPtr<ID3D11Texture2D> m_pb;
|
||||
std::unordered_map<uint32, GSVertexShader11> m_vs;
|
||||
CComPtr<ID3D11Buffer> m_vs_cb;
|
||||
std::unordered_map<uint32, CComPtr<ID3D11GeometryShader>> m_gs;
|
||||
CComPtr<ID3D11PixelShader> m_ps0;
|
||||
std::unordered_map<uint64, CComPtr<ID3D11PixelShader>> m_ps1;
|
||||
CComPtr<ID3D11Buffer> m_ps_cb;
|
||||
|
||||
void Write(GSOffset* off, const GSVector4i& r);
|
||||
void Read(GSOffset* off, const GSVector4i& r, bool invalidate);
|
||||
|
||||
struct OffsetBuffer
|
||||
{
|
||||
CComPtr<ID3D11Buffer> row, col;
|
||||
CComPtr<ID3D11ShaderResourceView> row_srv, col_srv;
|
||||
};
|
||||
|
||||
std::unordered_map<uint32, OffsetBuffer> m_offset;
|
||||
|
||||
bool GetOffsetBuffer(OffsetBuffer** fzbo);
|
||||
|
||||
protected:
|
||||
GSTexture* m_texture[2];
|
||||
uint8* m_output;
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
void ResetDevice();
|
||||
void VSync(int field);
|
||||
GSTexture* GetOutput(int i, int& y_offset);
|
||||
void Draw();
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut);
|
||||
|
||||
public:
|
||||
GSRendererCS();
|
||||
virtual ~GSRendererCS();
|
||||
};
|
|
@ -53,7 +53,6 @@ BEGIN
|
|||
"#include ""res/interlace.fx""\r\n"
|
||||
"#include ""res/merge.fx""\r\n"
|
||||
"#include ""res/fxaa.fx""\r\n"
|
||||
"#include ""res/cs.fx""\r\n"
|
||||
"#include ""res/shadeboost.fx""\r\n"
|
||||
"#include ""res/tfx.cl""\r\n"
|
||||
"\0"
|
||||
|
@ -77,8 +76,6 @@ IDR_MERGE_FX RCDATA "res\\merge.fx"
|
|||
|
||||
IDR_FXAA_FX RCDATA "res\\fxaa.fx"
|
||||
|
||||
IDR_CS_FX RCDATA "res\\cs.fx"
|
||||
|
||||
IDR_SHADEBOOST_FX RCDATA "res\\shadeboost.fx"
|
||||
|
||||
IDR_TFX_CL RCDATA "res\\tfx.cl"
|
||||
|
@ -467,7 +464,6 @@ END
|
|||
#include "res/interlace.fx"
|
||||
#include "res/merge.fx"
|
||||
#include "res/fxaa.fx"
|
||||
#include "res/cs.fx"
|
||||
#include "res/shadeboost.fx"
|
||||
#include "res/tfx.cl"
|
||||
|
||||
|
|
|
@ -129,7 +129,6 @@
|
|||
<ClCompile Include="GSRasterizer.cpp" />
|
||||
<ClCompile Include="GSRenderer.cpp" />
|
||||
<ClCompile Include="GSRendererCL.cpp" />
|
||||
<ClCompile Include="GSRendererCS.cpp" />
|
||||
<ClCompile Include="GSRendererDX.cpp" />
|
||||
<ClCompile Include="GSRendererDX11.cpp" />
|
||||
<ClCompile Include="GSRendererDX9.cpp" />
|
||||
|
@ -220,7 +219,6 @@
|
|||
<ClInclude Include="GSRasterizer.h" />
|
||||
<ClInclude Include="GSRenderer.h" />
|
||||
<ClInclude Include="GSRendererCL.h" />
|
||||
<ClInclude Include="GSRendererCS.h" />
|
||||
<ClInclude Include="GSRendererDX.h" />
|
||||
<ClInclude Include="GSRendererDX11.h" />
|
||||
<ClInclude Include="GSRendererDX9.h" />
|
||||
|
@ -286,7 +284,6 @@
|
|||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="GSdx.def" />
|
||||
<None Include="res\cs.fx" />
|
||||
<None Include="res\fxaa.fx" />
|
||||
<None Include="res\logo10.bmp" />
|
||||
<None Include="res\logo9.bmp" />
|
||||
|
|
|
@ -240,9 +240,6 @@
|
|||
<ClCompile Include="GSDrawScanlineCodeGenerator.x86.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSRendererCS.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSDrawScanlineCodeGenerator.x86.avx2.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
|
@ -530,9 +527,6 @@
|
|||
<ClInclude Include="config.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GSRendererCS.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="targetver.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
|
@ -617,9 +611,6 @@
|
|||
<None Include="res\shadeboost.fx">
|
||||
<Filter>Shaders</Filter>
|
||||
</None>
|
||||
<None Include="res\cs.fx">
|
||||
<Filter>Shaders</Filter>
|
||||
</None>
|
||||
<None Include="res\tfx.cl">
|
||||
<Filter>Shaders</Filter>
|
||||
</None>
|
||||
|
|
|
@ -1,387 +0,0 @@
|
|||
#ifdef SHADER_MODEL // make safe to include in resource file to enforce dependency
|
||||
|
||||
#ifndef VS_TME
|
||||
#define VS_TME 1
|
||||
#define VS_FST 1
|
||||
#endif
|
||||
|
||||
#ifndef GS_IIP
|
||||
#define GS_IIP 0
|
||||
#define GS_PRIM 2
|
||||
#endif
|
||||
|
||||
#ifndef PS_BATCH_SIZE
|
||||
#define PS_BATCH_SIZE 2048
|
||||
#define PS_FPSM PSM_PSMCT32
|
||||
#define PS_ZPSM PSM_PSMZ16
|
||||
#endif
|
||||
|
||||
#define PSM_PSMCT32 0
|
||||
#define PSM_PSMCT24 1
|
||||
#define PSM_PSMCT16 2
|
||||
#define PSM_PSMCT16S 10
|
||||
#define PSM_PSMT8 19
|
||||
#define PSM_PSMT4 20
|
||||
#define PSM_PSMT8H 27
|
||||
#define PSM_PSMT4HL 36
|
||||
#define PSM_PSMT4HH 44
|
||||
#define PSM_PSMZ32 48
|
||||
#define PSM_PSMZ24 49
|
||||
#define PSM_PSMZ16 50
|
||||
#define PSM_PSMZ16S 58
|
||||
|
||||
struct VS_INPUT
|
||||
{
|
||||
float2 st : TEXCOORD0;
|
||||
float4 c : COLOR0;
|
||||
float q : TEXCOORD1;
|
||||
uint2 p : POSITION0;
|
||||
uint z : POSITION1;
|
||||
uint2 uv : TEXCOORD2;
|
||||
float4 f : COLOR1;
|
||||
};
|
||||
|
||||
struct VS_OUTPUT
|
||||
{
|
||||
float4 p : SV_Position;
|
||||
float2 z : TEXCOORD0;
|
||||
float4 t : TEXCOORD1;
|
||||
float4 c : COLOR0;
|
||||
};
|
||||
|
||||
struct GS_OUTPUT
|
||||
{
|
||||
float4 p : SV_Position;
|
||||
float2 z : TEXCOORD0;
|
||||
float4 t : TEXCOORD1;
|
||||
float4 c : COLOR0;
|
||||
uint id : SV_PrimitiveID;
|
||||
};
|
||||
|
||||
cbuffer VSConstantBuffer : register(c0)
|
||||
{
|
||||
float4 VertexScale;
|
||||
float4 VertexOffset;
|
||||
};
|
||||
|
||||
cbuffer PSConstantBuffer : register(c0)
|
||||
{
|
||||
uint2 WriteMask;
|
||||
};
|
||||
|
||||
struct FragmentLinkItem
|
||||
{
|
||||
uint c, z, id, next;
|
||||
};
|
||||
|
||||
RWByteAddressBuffer VideoMemory : register(u0);
|
||||
RWStructuredBuffer<FragmentLinkItem> FragmentLinkBuffer : register(u1);
|
||||
RWByteAddressBuffer StartOffsetBuffer : register(u2);
|
||||
//RWTexture2D<uint> VideoMemory : register(u2); // 8192 * 512 R8_UINT
|
||||
|
||||
Buffer<int2> FZRowOffset : register(t0);
|
||||
Buffer<int2> FZColOffset : register(t1);
|
||||
Texture2D<float4> Palette : register(t2);
|
||||
Texture2D<float4> Texture : register(t3);
|
||||
|
||||
VS_OUTPUT vs_main(VS_INPUT input)
|
||||
{
|
||||
VS_OUTPUT output;
|
||||
|
||||
output.p = float4(input.p, 0.0f, 0.0f) * VertexScale - VertexOffset;
|
||||
output.z = float2(input.z & 0xffff, input.z >> 16); // TODO: min(input.z, 0xffffff00) ?
|
||||
|
||||
if(VS_TME)
|
||||
{
|
||||
if(VS_FST)
|
||||
{
|
||||
output.t.xy = input.uv;
|
||||
output.t.w = 1.0f;
|
||||
}
|
||||
else
|
||||
{
|
||||
output.t.xy = input.st;
|
||||
output.t.w = input.q;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
output.t.xy = 0;
|
||||
output.t.w = 1.0f;
|
||||
}
|
||||
|
||||
output.c = input.c;
|
||||
output.t.z = input.f.r;
|
||||
|
||||
return output;
|
||||
}
|
||||
|
||||
#if GS_PRIM == 0
|
||||
|
||||
[maxvertexcount(1)]
|
||||
void gs_main(point VS_OUTPUT input[1], inout PointStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
||||
output.p = input[0].p;
|
||||
output.z = input[0].z;
|
||||
output.t = input[0].t;
|
||||
output.c = input[0].c;
|
||||
output.id = id;
|
||||
|
||||
stream.Append(output);
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 1
|
||||
|
||||
[maxvertexcount(2)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout LineStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
[unroll]
|
||||
for(int i = 0; i < 2; i++)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
||||
output.p = input[i].p;
|
||||
output.z = input[i].z;
|
||||
output.t = input[i].t;
|
||||
output.c = input[i].c;
|
||||
output.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
if(i != 1) output.c = input[1].c;
|
||||
#endif
|
||||
|
||||
stream.Append(output);
|
||||
}
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 2
|
||||
|
||||
[maxvertexcount(3)]
|
||||
void gs_main(triangle VS_OUTPUT input[3], inout TriangleStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
[unroll]
|
||||
for(int i = 0; i < 3; i++)
|
||||
{
|
||||
GS_OUTPUT output;
|
||||
|
||||
output.p = input[i].p;
|
||||
output.z = input[i].z;
|
||||
output.t = input[i].t;
|
||||
output.c = input[i].c;
|
||||
output.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
if(i != 2) output.c = input[2].c;
|
||||
#endif
|
||||
|
||||
stream.Append(output);
|
||||
}
|
||||
}
|
||||
|
||||
#elif GS_PRIM == 3
|
||||
|
||||
[maxvertexcount(4)]
|
||||
void gs_main(line VS_OUTPUT input[2], inout TriangleStream<GS_OUTPUT> stream, uint id : SV_PrimitiveID)
|
||||
{
|
||||
GS_OUTPUT lt, rb, lb, rt;
|
||||
|
||||
lt.p = input[0].p;
|
||||
lt.z = input[1].z;
|
||||
lt.t.xy = input[0].t.xy;
|
||||
lt.t.zw = input[1].t.zw;
|
||||
lt.c = input[0].c;
|
||||
lt.id = id;
|
||||
|
||||
#if GS_IIP == 0
|
||||
lt.c = input[1].c;
|
||||
#endif
|
||||
|
||||
rb.p = input[1].p;
|
||||
rb.z = input[1].z;
|
||||
rb.t = input[1].t;
|
||||
rb.c = input[1].c;
|
||||
rb.id = id;
|
||||
|
||||
lb = lt;
|
||||
lb.p.y = rb.p.y;
|
||||
lb.t.y = rb.t.y;
|
||||
|
||||
rt = rb;
|
||||
rt.p.y = lt.p.y;
|
||||
rt.t.y = lt.t.y;
|
||||
|
||||
stream.Append(lt);
|
||||
stream.Append(lb);
|
||||
stream.Append(rt);
|
||||
stream.Append(rb);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
uint CompressColor32(float4 f)
|
||||
{
|
||||
uint4 c = (uint4)(f * 0xff) << uint4(0, 8, 16, 24);
|
||||
|
||||
return c.r | c.g | c.b | c.a;
|
||||
}
|
||||
|
||||
uint DecompressColor16(uint c)
|
||||
{
|
||||
uint r = (c & 0x001f) << 3;
|
||||
uint g = (c & 0x03e0) << 6;
|
||||
uint b = (c & 0x7c00) << 9;
|
||||
uint a = (c & 0x8000) << 15;
|
||||
|
||||
return r | g | b | a;
|
||||
}
|
||||
|
||||
uint ReadPixel(uint addr)
|
||||
{
|
||||
return VideoMemory.Load(addr) >> ((addr & 2) << 3);
|
||||
}
|
||||
|
||||
void WritePixel(uint addr, uint value, uint psm)
|
||||
{
|
||||
uint tmp;
|
||||
|
||||
switch(psm)
|
||||
{
|
||||
case PSM_PSMCT32:
|
||||
case PSM_PSMZ32:
|
||||
case PSM_PSMCT24:
|
||||
case PSM_PSMZ24:
|
||||
VideoMemory.Store(addr, value);
|
||||
break;
|
||||
case PSM_PSMCT16:
|
||||
case PSM_PSMCT16S:
|
||||
case PSM_PSMZ16:
|
||||
case PSM_PSMZ16S:
|
||||
tmp = (addr & 2) << 3;
|
||||
value = ((value << tmp) ^ VideoMemory.Load(addr)) & (0x0000ffff << tmp);
|
||||
VideoMemory.InterlockedXor(addr, value, tmp);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void ps_main0(GS_OUTPUT input)
|
||||
{
|
||||
uint x = (uint)input.p.x;
|
||||
uint y = (uint)input.p.y;
|
||||
|
||||
uint tail = FragmentLinkBuffer.IncrementCounter();
|
||||
|
||||
uint index = (y << 11) + x;
|
||||
uint next = 0;
|
||||
|
||||
StartOffsetBuffer.InterlockedExchange(index * 4, tail, next);
|
||||
|
||||
FragmentLinkItem item;
|
||||
|
||||
// TODO: preprocess color (tfx, alpha test), z-test
|
||||
|
||||
item.c = CompressColor32(input.c);
|
||||
item.z = (uint)(input.z.y * 0x10000 + input.z.x);
|
||||
item.id = input.id;
|
||||
item.next = next;
|
||||
|
||||
FragmentLinkBuffer[tail] = item;
|
||||
}
|
||||
|
||||
void ps_main1(GS_OUTPUT input)
|
||||
{
|
||||
uint2 pos = (uint2)input.p.xy;
|
||||
|
||||
// sort fragments
|
||||
|
||||
uint StartOffsetIndex = (pos.y << 11) + pos.x;
|
||||
|
||||
int index[PS_BATCH_SIZE];
|
||||
int count = 0;
|
||||
|
||||
uint next = StartOffsetBuffer.Load(StartOffsetIndex * 4);
|
||||
|
||||
StartOffsetBuffer.Store(StartOffsetIndex * 4, 0);
|
||||
|
||||
[allow_uav_condition]
|
||||
while(next != 0)
|
||||
{
|
||||
index[count++] = next;
|
||||
|
||||
next = FragmentLinkBuffer[next].next;
|
||||
}
|
||||
|
||||
int N2 = 1 << (int)(ceil(log2(count)));
|
||||
|
||||
[allow_uav_condition]
|
||||
for(int i = count; i < N2; i++)
|
||||
{
|
||||
index[i] = 0;
|
||||
}
|
||||
|
||||
[allow_uav_condition]
|
||||
for(int k = 2; k <= N2; k = 2 * k)
|
||||
{
|
||||
[allow_uav_condition]
|
||||
for(int j = k >> 1; j > 0 ; j = j >> 1)
|
||||
{
|
||||
[allow_uav_condition]
|
||||
for(int i = 0; i < N2; i++)
|
||||
{
|
||||
uint i_id = FragmentLinkBuffer[index[i]].id;
|
||||
|
||||
int ixj = i ^ j;
|
||||
|
||||
if(ixj > i)
|
||||
{
|
||||
uint ixj_id = FragmentLinkBuffer[index[ixj]].id;
|
||||
|
||||
if((i & k) == 0 && i_id > ixj_id)
|
||||
{
|
||||
int temp = index[i];
|
||||
index[i] = index[ixj];
|
||||
index[ixj] = temp;
|
||||
}
|
||||
|
||||
if((i & k) != 0 && i_id < ixj_id)
|
||||
{
|
||||
int temp = index[i];
|
||||
index[i] = index[ixj];
|
||||
index[ixj] = temp;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uint2 addr = (uint2)(FZRowOffset[pos.y] + FZColOffset[pos.x]) << 1;
|
||||
|
||||
uint dc = ReadPixel(addr.x);
|
||||
uint dz = ReadPixel(addr.y);
|
||||
|
||||
uint sc = dc;
|
||||
uint sz = dz;
|
||||
|
||||
[allow_uav_condition]
|
||||
while(--count >= 0)
|
||||
{
|
||||
FragmentLinkItem f = FragmentLinkBuffer[index[count]];
|
||||
|
||||
// TODO
|
||||
|
||||
if(sz < f.z)
|
||||
{
|
||||
sc = f.c;
|
||||
sz = f.z;
|
||||
}
|
||||
}
|
||||
|
||||
uint c = sc; // (dc & ~WriteMask.x) | (sc & WriteMask.x);
|
||||
uint z = 0;//sz; //(dz & ~WriteMask.y) | (sz & WriteMask.y);
|
||||
|
||||
WritePixel(addr.x, c, PS_FPSM);
|
||||
WritePixel(addr.y, z, PS_ZPSM);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -149,7 +149,6 @@
|
|||
#define IDR_MERGE_FX 10002
|
||||
#define IDR_INTERLACE_FX 10003
|
||||
#define IDR_FXAA_FX 10004
|
||||
#define IDR_CS_FX 10005
|
||||
#define IDD_SHADER 10006
|
||||
#define IDR_SHADEBOOST_FX 10007
|
||||
#define IDR_TFX_CL 10008
|
||||
|
|
Loading…
Reference in New Issue