mirror of https://github.com/PCSX2/pcsx2.git
GSdx: nothing really new, just testing the compute shader, if you are an expert take a look and tell me your opinion :P
git-svn-id: http://pcsx2.googlecode.com/svn/trunk@5068 96395faa-99c1-11dd-bbfe-3dabce05a288
This commit is contained in:
parent
481f1fdda2
commit
da4ea83134
|
@ -33,6 +33,7 @@
|
|||
#include "GSRendererDX11.h"
|
||||
#include "GSDevice9.h"
|
||||
#include "GSDevice11.h"
|
||||
#include "GSRendererCS.h"
|
||||
#include "GSSettingsDlg.h"
|
||||
|
||||
static HRESULT s_hr = E_FAIL;
|
||||
|
@ -206,41 +207,64 @@ static int _GSopen(void** dsp, char* title, int renderer, int threads = -1)
|
|||
s_gs = NULL;
|
||||
}
|
||||
|
||||
switch(renderer / 3)
|
||||
if(renderer == 12)
|
||||
{
|
||||
default:
|
||||
#ifdef _WINDOWS
|
||||
case 0: dev = new GSDevice9(); break;
|
||||
case 1: dev = new GSDevice11(); break;
|
||||
#endif
|
||||
case 2: dev = new GSDeviceSDL(); break;
|
||||
case 3: dev = new GSDeviceNull(); break;
|
||||
}
|
||||
#ifdef _WINDOWS
|
||||
|
||||
dev = new GSDevice11();
|
||||
|
||||
if(dev == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
if(dev == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(s_gs == NULL)
|
||||
if(s_gs == NULL)
|
||||
{
|
||||
s_gs = new GSRendererCS();
|
||||
|
||||
s_renderer = renderer;
|
||||
}
|
||||
|
||||
#endif
|
||||
}
|
||||
else
|
||||
{
|
||||
switch(renderer % 3)
|
||||
switch(renderer / 3)
|
||||
{
|
||||
default:
|
||||
#ifdef _WINDOWS
|
||||
case 0:
|
||||
s_gs = (renderer / 3) == 0 ? (GSRenderer*)new GSRendererDX9() : (GSRenderer*)new GSRendererDX11();
|
||||
break;
|
||||
case 0: dev = new GSDevice9(); break;
|
||||
case 1: dev = new GSDevice11(); break;
|
||||
#endif
|
||||
case 1:
|
||||
s_gs = new GSRendererSW(threads);
|
||||
break;
|
||||
case 2:
|
||||
s_gs = new GSRendererNull();
|
||||
break;
|
||||
case 2: dev = new GSDeviceSDL(); break;
|
||||
case 3: dev = new GSDeviceNull(); break;
|
||||
}
|
||||
|
||||
s_renderer = renderer;
|
||||
if(dev == NULL)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(s_gs == NULL)
|
||||
{
|
||||
switch(renderer % 3)
|
||||
{
|
||||
default:
|
||||
#ifdef _WINDOWS
|
||||
case 0:
|
||||
s_gs = (renderer / 3) == 0 ? (GSRenderer*)new GSRendererDX9() : (GSRenderer*)new GSRendererDX11();
|
||||
break;
|
||||
#endif
|
||||
case 1:
|
||||
s_gs = new GSRendererSW(threads);
|
||||
break;
|
||||
case 2:
|
||||
s_gs = new GSRendererNull();
|
||||
break;
|
||||
}
|
||||
|
||||
s_renderer = renderer;
|
||||
}
|
||||
}
|
||||
}
|
||||
catch(std::exception& ex)
|
||||
|
|
|
@ -28,8 +28,14 @@
|
|||
|
||||
#define PLUGIN_VERSION 16
|
||||
|
||||
#define MAX_PAGES 512
|
||||
#define MAX_BLOCKS 16384
|
||||
#define VM_SIZE 4194304
|
||||
#define PAGE_SIZE 8192
|
||||
#define BLOCK_SIZE 256
|
||||
#define COLUMN_SIZE 64
|
||||
|
||||
#define MAX_PAGES (VM_SIZE / PAGE_SIZE)
|
||||
#define MAX_BLOCKS (VM_SIZE / BLOCK_SIZE)
|
||||
#define MAX_COLUMNS (VM_SIZE / COLUMN_SIZE)
|
||||
|
||||
//if defined, will send much info in reply to the API title info queri from PCSX2
|
||||
//default should be undefined
|
||||
|
|
|
@ -144,7 +144,7 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
|
||||
for(int i = 0; i < countof(m_convert.ps); i++)
|
||||
{
|
||||
hr = CompileShader(IDR_CONVERT_FX, format("ps_main%d", i), NULL, &m_convert.ps[i]);
|
||||
hr = CompileShader(IDR_CONVERT_FX, format("ps_main%d", i).c_str(), NULL, &m_convert.ps[i]);
|
||||
}
|
||||
|
||||
memset(&dsd, 0, sizeof(dsd));
|
||||
|
@ -172,7 +172,7 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
|
||||
for(int i = 0; i < countof(m_merge.ps); i++)
|
||||
{
|
||||
hr = CompileShader(IDR_MERGE_FX, format("ps_main%d", i), NULL, &m_merge.ps[i]);
|
||||
hr = CompileShader(IDR_MERGE_FX, format("ps_main%d", i).c_str(), NULL, &m_merge.ps[i]);
|
||||
}
|
||||
|
||||
memset(&bsd, 0, sizeof(bsd));
|
||||
|
@ -200,7 +200,7 @@ bool GSDevice11::Create(GSWnd* wnd)
|
|||
|
||||
for(int i = 0; i < countof(m_interlace.ps); i++)
|
||||
{
|
||||
hr = CompileShader(IDR_INTERLACE_FX, format("ps_main%d", i), NULL, &m_interlace.ps[i]);
|
||||
hr = CompileShader(IDR_INTERLACE_FX, format("ps_main%d", i).c_str(), NULL, &m_interlace.ps[i]);
|
||||
}
|
||||
|
||||
// fxaa
|
||||
|
@ -360,6 +360,11 @@ void GSDevice11::DrawIndexedPrimitive()
|
|||
m_ctx->DrawIndexed(m_index.count, m_index.start, m_vertex.start);
|
||||
}
|
||||
|
||||
void GSDevice11::Dispatch(uint32 x, uint32 y, uint32 z)
|
||||
{
|
||||
m_ctx->Dispatch(x, y, z);
|
||||
}
|
||||
|
||||
void GSDevice11::ClearRenderTarget(GSTexture* t, const GSVector4& c)
|
||||
{
|
||||
m_ctx->ClearRenderTargetView(*(GSTexture11*)t, c.v);
|
||||
|
@ -937,7 +942,7 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
m_ctx->PSSetShader(ps, NULL, 0);
|
||||
}
|
||||
|
||||
if (m_srv_changed)
|
||||
if(m_srv_changed)
|
||||
{
|
||||
m_ctx->PSSetShaderResources(0, 3, m_state.ps_srv);
|
||||
|
||||
|
@ -959,6 +964,38 @@ void GSDevice11::PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb)
|
|||
}
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv)
|
||||
{
|
||||
// TODO: if(m_state.cs_srv[i] != srv)
|
||||
{
|
||||
// TODO: m_state.cs_srv[i] = srv;
|
||||
|
||||
m_ctx->CSSetShaderResources(i, 1, &srv);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav)
|
||||
{
|
||||
// TODO: if(m_state.cs_uav[i] != uav)
|
||||
{
|
||||
// TODO: m_state.cs_uav[i] = uav;
|
||||
|
||||
// uint32 count[] = {-1};
|
||||
|
||||
m_ctx->CSSetUnorderedAccessViews(i, 1, &uav, NULL);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::CSSetShader(ID3D11ComputeShader* cs)
|
||||
{
|
||||
if(m_state.cs != cs)
|
||||
{
|
||||
m_state.cs = cs;
|
||||
|
||||
m_ctx->CSSetShader(cs, NULL, 0);
|
||||
}
|
||||
}
|
||||
|
||||
void GSDevice11::OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref)
|
||||
{
|
||||
if(m_state.dss != dss || m_state.sref != sref)
|
||||
|
@ -1027,7 +1064,7 @@ void GSDevice11::OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector
|
|||
}
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il)
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -1037,7 +1074,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
|
|||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.vs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
|
@ -1066,7 +1103,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
|
|||
return hr;
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs)
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -1076,7 +1113,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
|
|||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.gs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
|
@ -1098,7 +1135,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
|
|||
return hr;
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps)
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
|
@ -1108,7 +1145,7 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
|
|||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry.c_str(), m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
|
@ -1120,7 +1157,71 @@ HRESULT GSDevice11::CompileShader(uint32 id, const string& entry, D3D11_SHADER_M
|
|||
return hr;
|
||||
}
|
||||
|
||||
hr = m_dev->CreatePixelShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, ps);
|
||||
hr = m_dev->CreatePixelShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, ps);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
return hr;
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
vector<D3D11_SHADER_MACRO> m;
|
||||
|
||||
PrepareShaderMacro(m, macro);
|
||||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromResource(theApp.GetModuleHandle(), MAKEINTRESOURCE(id), NULL, &m[0], NULL, entry, m_shader.ps.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
printf("%s\n", (const char*)error->GetBufferPointer());
|
||||
}
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
return hr;
|
||||
}
|
||||
|
||||
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, cs);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
return hr;
|
||||
}
|
||||
|
||||
return hr;
|
||||
}
|
||||
|
||||
HRESULT GSDevice11::CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs)
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
vector<D3D11_SHADER_MACRO> m;
|
||||
|
||||
PrepareShaderMacro(m, macro);
|
||||
|
||||
CComPtr<ID3D11Blob> shader, error;
|
||||
|
||||
hr = D3DX11CompileFromFile(fn, &m[0], NULL, entry, m_shader.cs.c_str(), 0, 0, NULL, &shader, &error, NULL);
|
||||
|
||||
if(error)
|
||||
{
|
||||
printf("%s\n", (const char*)error->GetBufferPointer());
|
||||
}
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
return hr;
|
||||
}
|
||||
|
||||
hr = m_dev->CreateComputeShader((void*)shader->GetBufferPointer(), shader->GetBufferSize(),NULL, cs);
|
||||
|
||||
if(FAILED(hr))
|
||||
{
|
||||
|
|
|
@ -64,6 +64,7 @@ class GSDevice11 : public GSDeviceDX
|
|||
ID3D11PixelShader* ps;
|
||||
ID3D11Buffer* ps_cb;
|
||||
ID3D11SamplerState* ps_ss[3];
|
||||
ID3D11ComputeShader* cs;
|
||||
GSVector2i viewport;
|
||||
GSVector4i scissor;
|
||||
ID3D11DepthStencilState* dss;
|
||||
|
@ -145,6 +146,7 @@ public:
|
|||
|
||||
void DrawPrimitive();
|
||||
void DrawIndexedPrimitive();
|
||||
void Dispatch(uint32 x, uint32 y, uint32 z);
|
||||
|
||||
void ClearRenderTarget(GSTexture* t, const GSVector4& c);
|
||||
void ClearRenderTarget(GSTexture* t, uint32 c);
|
||||
|
@ -178,6 +180,9 @@ public:
|
|||
void PSSetShaderResource(int i, GSTexture* sr);
|
||||
void PSSetShader(ID3D11PixelShader* ps, ID3D11Buffer* ps_cb);
|
||||
void PSSetSamplerState(ID3D11SamplerState* ss0, ID3D11SamplerState* ss1, ID3D11SamplerState* ss2 = NULL);
|
||||
void CSSetShaderSRV(int i, ID3D11ShaderResourceView* srv);
|
||||
void CSSetShaderUAV(int i, ID3D11UnorderedAccessView* uav);
|
||||
void CSSetShader(ID3D11ComputeShader* cs);
|
||||
void OMSetDepthStencilState(ID3D11DepthStencilState* dss, uint8 sref);
|
||||
void OMSetBlendState(ID3D11BlendState* bs, float bf);
|
||||
void OMSetRenderTargets(GSTexture* rt, GSTexture* ds, const GSVector4i* scissor = NULL);
|
||||
|
@ -195,8 +200,10 @@ public:
|
|||
operator ID3D11Device*() {return m_dev;}
|
||||
operator ID3D11DeviceContext*() {return m_ctx;}
|
||||
|
||||
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
|
||||
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
|
||||
HRESULT CompileShader(uint32 id, const string& entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11VertexShader** vs, D3D11_INPUT_ELEMENT_DESC* layout, int count, ID3D11InputLayout** il);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11GeometryShader** gs);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11PixelShader** ps);
|
||||
HRESULT CompileShader(uint32 id, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
|
||||
HRESULT CompileShader(const char* fn, const char* entry, D3D11_SHADER_MACRO* macro, ID3D11ComputeShader** cs);
|
||||
};
|
||||
|
||||
|
|
|
@ -67,18 +67,21 @@ bool GSDeviceDX::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
|
|||
m_shader.vs = "vs_4_0";
|
||||
m_shader.gs = "gs_4_0";
|
||||
m_shader.ps = "ps_4_0";
|
||||
m_shader.cs = "cs_4_0";
|
||||
break;
|
||||
case D3D_FEATURE_LEVEL_10_1:
|
||||
m_shader.model = "0x401";
|
||||
m_shader.vs = "vs_4_1";
|
||||
m_shader.gs = "gs_4_1";
|
||||
m_shader.ps = "ps_4_1";
|
||||
m_shader.cs = "cs_4_1";
|
||||
break;
|
||||
case D3D_FEATURE_LEVEL_11_0:
|
||||
m_shader.model = "0x500";
|
||||
m_shader.vs = "vs_5_0";
|
||||
m_shader.gs = "gs_5_0";
|
||||
m_shader.ps = "ps_5_0";
|
||||
m_shader.cs = "cs_5_0";
|
||||
break;
|
||||
default:
|
||||
ASSERT(0);
|
||||
|
|
|
@ -266,7 +266,7 @@ public:
|
|||
#pragma pack(pop)
|
||||
|
||||
protected:
|
||||
struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps;} m_shader;
|
||||
struct {D3D_FEATURE_LEVEL level; string model, vs, gs, ps, cs;} m_shader;
|
||||
uint32 m_msaa;
|
||||
DXGI_SAMPLE_DESC m_msaa_desc;
|
||||
|
||||
|
@ -277,6 +277,7 @@ public:
|
|||
virtual ~GSDeviceDX();
|
||||
|
||||
bool SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode);
|
||||
void GetFeatureLevel(D3D_FEATURE_LEVEL& level) const {level = m_shader.level;}
|
||||
|
||||
virtual void SetupIA(const void* vertex, int vertex_count, const uint32* index, int index_count, int prim) = 0;
|
||||
virtual void SetupVS(VSSelector sel, const VSConstantBuffer* cb) = 0;
|
||||
|
|
|
@ -500,6 +500,11 @@ GSPixelOffset4* GSLocalMemory::GetPixelOffset4(const GIFRegFRAME& FRAME, const G
|
|||
GSPixelOffset4* o = (GSPixelOffset4*)_aligned_malloc(sizeof(GSPixelOffset4), 32);
|
||||
|
||||
o->hash = hash;
|
||||
o->fbp = fbp;
|
||||
o->zbp = zbp;
|
||||
o->fpsm = fpsm;
|
||||
o->zpsm = zpsm;
|
||||
o->bw = bw;
|
||||
|
||||
pixelAddress fpa = m_psm[fpsm].pa;
|
||||
pixelAddress zpa = m_psm[zpsm].pa;
|
||||
|
|
|
@ -63,6 +63,7 @@ struct GSPixelOffset4
|
|||
GSVector2i row[2048]; // f yn | z yn (n = 0 1 2 ...)
|
||||
GSVector2i col[512]; // f xn | z xn (n = 0 4 8 ...)
|
||||
uint32 hash;
|
||||
uint32 fbp, zbp, fpsm, zpsm, bw;
|
||||
};
|
||||
|
||||
class GSLocalMemory : public GSBlock
|
||||
|
|
|
@ -64,11 +64,15 @@ GSRasterizer::~GSRasterizer()
|
|||
|
||||
bool GSRasterizer::IsOneOfMyScanlines(int top) const
|
||||
{
|
||||
ASSERT(top >= 0 && top < 2048);
|
||||
|
||||
return m_myscanline[top >> THREAD_HEIGHT] != 0;
|
||||
}
|
||||
|
||||
bool GSRasterizer::IsOneOfMyScanlines(int top, int bottom) const
|
||||
{
|
||||
ASSERT(top >= 0 && top < 2048 && bottom >= 0 && bottom < 2048);
|
||||
|
||||
top = top >> THREAD_HEIGHT;
|
||||
bottom = (bottom + (1 << THREAD_HEIGHT) - 1) >> THREAD_HEIGHT;
|
||||
|
||||
|
@ -187,12 +191,12 @@ void GSRasterizer::Draw(GSRasterizerData* data)
|
|||
|
||||
if(index != NULL)
|
||||
{
|
||||
do {DrawSprite(vertex, index, data->solidrect); index += 2;}
|
||||
do {DrawSprite(vertex, index); index += 2;}
|
||||
while(index < index_end);
|
||||
}
|
||||
else
|
||||
{
|
||||
do {DrawSprite(vertex, tmp_index, data->solidrect); vertex += 2;}
|
||||
do {DrawSprite(vertex, tmp_index); vertex += 2;}
|
||||
while(vertex < vertex_end);
|
||||
}
|
||||
|
||||
|
@ -407,7 +411,7 @@ void GSRasterizer::DrawTriangle(const GSVertexSW* vertex, const uint32* index)
|
|||
GSVector4 tbf = y0011.xzxz(y1221).ceil();
|
||||
GSVector4 tbmax = tbf.max(m_fscissor_y);
|
||||
GSVector4 tbmin = tbf.min(m_fscissor_y);
|
||||
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin));
|
||||
GSVector4i tb = GSVector4i(tbmax.xzyw(tbmin)); // max(y0, t) max(y1, t) min(y1, b) min(y2, b)
|
||||
|
||||
dv[0] = v1 - v0;
|
||||
dv[1] = v2 - v0;
|
||||
|
@ -565,7 +569,7 @@ void GSRasterizer::DrawTriangleSection(int top, int bottom, GSVertexSW& edge, co
|
|||
m_edge.count += e - &m_edge.buff[m_edge.count];
|
||||
}
|
||||
|
||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect)
|
||||
void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index)
|
||||
{
|
||||
const GSVertexSW& v0 = vertex[index[0]];
|
||||
const GSVertexSW& v1 = vertex[index[1]];
|
||||
|
@ -589,7 +593,7 @@ void GSRasterizer::DrawSprite(const GSVertexSW* vertex, const uint32* index, boo
|
|||
|
||||
GSVertexSW scan = v[0];
|
||||
|
||||
if(solidrect)
|
||||
if(m_ds->IsSolidRect())
|
||||
{
|
||||
if(m_threads == 1)
|
||||
{
|
||||
|
@ -904,7 +908,6 @@ GSRasterizerList::GSRasterizerList()
|
|||
: GSJobQueue<shared_ptr<GSRasterizerData> >()
|
||||
, m_sync_count(0)
|
||||
, m_syncpoint_count(0)
|
||||
, m_solidrect_count(0)
|
||||
{
|
||||
}
|
||||
|
||||
|
@ -955,11 +958,6 @@ int GSRasterizerList::GetPixels(bool reset)
|
|||
|
||||
void GSRasterizerList::Process(shared_ptr<GSRasterizerData>& item)
|
||||
{
|
||||
if(item->solidrect)
|
||||
{
|
||||
m_solidrect_count++;
|
||||
}
|
||||
|
||||
if(item->syncpoint)
|
||||
{
|
||||
for(size_t i = 0; i < m_workers.size(); i++)
|
||||
|
|
|
@ -39,7 +39,6 @@ public:
|
|||
int vertex_count;
|
||||
uint32* index;
|
||||
int index_count;
|
||||
bool solidrect;
|
||||
bool syncpoint;
|
||||
uint64 frame;
|
||||
|
||||
|
@ -52,7 +51,6 @@ public:
|
|||
, vertex_count(0)
|
||||
, index(NULL)
|
||||
, index_count(0)
|
||||
, solidrect(false)
|
||||
, syncpoint(false)
|
||||
, frame(0)
|
||||
{
|
||||
|
@ -101,6 +99,7 @@ public:
|
|||
#endif
|
||||
|
||||
__forceinline bool HasEdge() const {return m_de != NULL;}
|
||||
__forceinline bool IsSolidRect() const {return m_dr != NULL;}
|
||||
};
|
||||
|
||||
class IRasterizer : public GSAlignedClass<32>
|
||||
|
@ -133,7 +132,7 @@ protected:
|
|||
void DrawPoint(const GSVertexSW* vertex, int vertex_count, const uint32* index, int index_count);
|
||||
void DrawLine(const GSVertexSW* vertex, const uint32* index);
|
||||
void DrawTriangle(const GSVertexSW* vertex, const uint32* index);
|
||||
void DrawSprite(const GSVertexSW* vertex, const uint32* index, bool solidrect);
|
||||
void DrawSprite(const GSVertexSW* vertex, const uint32* index);
|
||||
|
||||
__forceinline void DrawTriangleSection(int top, int bottom, GSVertexSW& edge, const GSVertexSW& dedge, const GSVertexSW& dscan, const GSVector4& p0);
|
||||
|
||||
|
@ -214,7 +213,6 @@ public:
|
|||
|
||||
int m_sync_count;
|
||||
int m_syncpoint_count;
|
||||
int m_solidrect_count;
|
||||
|
||||
// IRasterizer
|
||||
|
||||
|
|
|
@ -0,0 +1,426 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#include "stdafx.h"
|
||||
#include "GSRendererCS.h"
|
||||
|
||||
GSRendererCS::GSRendererCS()
|
||||
: GSRenderer(new GSVertexTraceCS(this), sizeof(GSVertex))
|
||||
{
|
||||
m_nativeres = true;
|
||||
|
||||
InitConvertVertex(GSRendererCS);
|
||||
|
||||
memset(m_vm_valid, 0, sizeof(m_vm_valid));
|
||||
}
|
||||
|
||||
GSRendererCS::~GSRendererCS()
|
||||
{
|
||||
}
|
||||
|
||||
bool GSRendererCS::CreateDevice(GSDevice* dev_unk)
|
||||
{
|
||||
if(!__super::CreateDevice(dev_unk))
|
||||
return false;
|
||||
|
||||
D3D_FEATURE_LEVEL level;
|
||||
|
||||
((GSDeviceDX*)dev_unk)->GetFeatureLevel(level);
|
||||
|
||||
if(level < D3D_FEATURE_LEVEL_10_0)
|
||||
return false;
|
||||
|
||||
HRESULT hr;
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)dev_unk;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
|
||||
// video memory (4MB)
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = 4 * 1024 * 1024;
|
||||
bd.StructureByteStride = 4;
|
||||
bd.Usage = D3D11_USAGE_DEFAULT;
|
||||
bd.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vm);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
memset(&uavd, 0, sizeof(uavd));
|
||||
|
||||
uavd.Format = DXGI_FORMAT_R32_TYPELESS;
|
||||
uavd.Buffer.FirstElement = 0;
|
||||
uavd.Buffer.NumElements = 1024 * 1024;
|
||||
uavd.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW;
|
||||
uavd.ViewDimension = D3D11_UAV_DIMENSION_BUFFER;
|
||||
|
||||
hr = (*dev)->CreateUnorderedAccessView(m_vm, &uavd, &m_vm_uav);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// vertex buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(GSVertex) * 10000;
|
||||
bd.StructureByteStride = sizeof(GSVertex);
|
||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
bd.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_STRUCTURED;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_vb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// index buffer
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(uint32) * 10000 * 3;
|
||||
bd.Usage = D3D11_USAGE_DYNAMIC;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_WRITE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_ib);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
// one page, for copying between cpu<->gpu
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = PAGE_SIZE;
|
||||
bd.Usage = D3D11_USAGE_STAGING;
|
||||
bd.CPUAccessFlags = D3D11_CPU_ACCESS_READ | D3D11_CPU_ACCESS_WRITE;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, NULL, &m_pb);
|
||||
|
||||
if(FAILED(hr)) return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
GSTexture* GSRendererCS::GetOutput(int i)
|
||||
{
|
||||
// TODO: create a compute shader which unswizzles the frame from m_vm to the output texture
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void GSRendererCS::ConvertVertex(size_t dst_index, size_t src_index)
|
||||
{
|
||||
// TODO: vertex format more fitting as the input for the compute shader
|
||||
|
||||
if(src_index != dst_index)
|
||||
{
|
||||
GSVertex v = ((GSVertex*)m_vertex.buff)[src_index];
|
||||
|
||||
((GSVertex*)m_vertex.buff)[dst_index] = v;
|
||||
}
|
||||
}
|
||||
|
||||
void GSRendererCS::Draw()
|
||||
{
|
||||
HRESULT hr;
|
||||
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
D3D11_BUFFER_DESC bd;
|
||||
D3D11_UNORDERED_ACCESS_VIEW_DESC uavd;
|
||||
D3D11_SHADER_RESOURCE_VIEW_DESC srvd;
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
||||
CComPtr<ID3D11ShaderResourceView> vb_srv;
|
||||
CComPtr<ID3D11ShaderResourceView> ib_srv;
|
||||
|
||||
// TODO: cache these in hash_maps
|
||||
|
||||
CComPtr<ID3D11Buffer> fbr, fbc, zbr, zbc;
|
||||
CComPtr<ID3D11ShaderResourceView> fbr_srv, fbc_srv, zbr_srv, zbc_srv;
|
||||
|
||||
// TODO: grow m_vb, m_ib if needed
|
||||
|
||||
if(m_vertex.next > 10000) return;
|
||||
if(m_index.tail > 30000) return;
|
||||
|
||||
// TODO: fill/advance/discardwhenfull, as in GSDevice11::IASetVertexBuffer/IASetIndexBuffer
|
||||
|
||||
hr = ctx->Map(m_vb, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
|
||||
memcpy(map.pData, m_vertex.buff, sizeof(GSVertex) * m_vertex.next);
|
||||
|
||||
ctx->Unmap(m_vb, 0);
|
||||
|
||||
//
|
||||
|
||||
hr = ctx->Map(m_ib, 0, D3D11_MAP_WRITE_DISCARD, 0, &map); // discarding, until properly advancing the start pointer around
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
|
||||
memcpy(map.pData, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
ctx->Unmap(m_ib, 0);
|
||||
|
||||
// TODO: UpdateResource might be faster, based on my exprience with the real vertex buffer, write-no-overwrite/discarded dynamic buffer + map is better
|
||||
|
||||
//
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_UNKNOWN;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = m_vertex.next;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_vb, &srvd, &vb_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
|
||||
|
||||
memset(&srvd, 0, sizeof(srvd));
|
||||
|
||||
srvd.Format = DXGI_FORMAT_R32_UINT;
|
||||
srvd.ViewDimension = D3D11_SRV_DIMENSION_BUFFER;
|
||||
srvd.Buffer.FirstElement = 0;
|
||||
srvd.Buffer.NumElements = m_index.tail;
|
||||
|
||||
hr = (*dev)->CreateShaderResourceView(m_ib, &srvd, &ib_srv); // TODO: have to create this dyncamically in Draw() or pass the start/count in a const reg
|
||||
|
||||
// fzb offsets
|
||||
|
||||
memset(&bd, 0, sizeof(bd));
|
||||
|
||||
bd.ByteWidth = sizeof(int) * 4096;
|
||||
bd.StructureByteStride = sizeof(int);
|
||||
bd.Usage = D3D11_USAGE_IMMUTABLE;
|
||||
bd.BindFlags = D3D11_BIND_SHADER_RESOURCE;
|
||||
|
||||
D3D11_SUBRESOURCE_DATA data;
|
||||
|
||||
memset(&data, 0, sizeof(data));
|
||||
|
||||
data.pSysMem = m_context->offset.fb->pixel.row;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &fbr);
|
||||
|
||||
data.pSysMem = m_context->offset.fb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &fbc);
|
||||
|
||||
data.pSysMem = m_context->offset.zb->pixel.row;
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &zbr);
|
||||
|
||||
data.pSysMem = m_context->offset.zb->pixel.col[0]; // same column layout for every line in case of frame and zbuffer formats
|
||||
|
||||
hr = (*dev)->CreateBuffer(&bd, &data, &zbc);
|
||||
|
||||
// TODO: D3D10_SHADER_MACRO (primclass, less frequently changing drawing attribs, etc.)
|
||||
|
||||
uint32 sel = 0; // TODO
|
||||
|
||||
hash_map<uint32, CComPtr<ID3D11ComputeShader> >::iterator i = m_cs.find(sel);
|
||||
|
||||
CComPtr<ID3D11ComputeShader> cs;
|
||||
|
||||
if(i == m_cs.end())
|
||||
{
|
||||
// hr = dev->CompileShader(IDR_CS_FX, "cs_main", NULL, &cs);
|
||||
hr = dev->CompileShader("E:\\Progs\\pcsx2\\plugins\\GSdx\\res\\cs.fx", "cs_main", NULL, &cs);
|
||||
|
||||
if(FAILED(hr)) return;
|
||||
|
||||
m_cs[sel] = cs;
|
||||
}
|
||||
else
|
||||
{
|
||||
cs = i->second;
|
||||
}
|
||||
|
||||
//
|
||||
|
||||
dev->CSSetShaderUAV(0, m_vm_uav);
|
||||
|
||||
dev->CSSetShaderSRV(0, vb_srv);
|
||||
dev->CSSetShaderSRV(1, ib_srv);
|
||||
dev->CSSetShaderSRV(2, fbr_srv);
|
||||
dev->CSSetShaderSRV(3, fbc_srv);
|
||||
dev->CSSetShaderSRV(4, zbr_srv);
|
||||
dev->CSSetShaderSRV(5, zbc_srv);
|
||||
|
||||
dev->CSSetShader(cs);
|
||||
|
||||
GSVector4i bbox = GSVector4i(0, 0, 640, 512); // TODO: vertex trace
|
||||
|
||||
GSVector4i r = bbox.ralign<Align_Outside>(GSVector2i(16, 8));
|
||||
|
||||
bool fb = true; // TODO: frame buffer used
|
||||
bool zb = true; // TODO: z-buffer used
|
||||
|
||||
if(fb) Write(m_context->offset.fb, r);
|
||||
if(zb) Write(m_context->offset.zb, r);
|
||||
|
||||
// TODO: constant buffer (frequently chaning drawing attribs)
|
||||
// TODO: texture (implement texture cache)
|
||||
// TODO: clut to a palette texture (should be texture1d, not simply buffer, it is random accessed)
|
||||
// TODO: CSSetShaderSRV(6 7 8 ..., texture level 0 1 2 ...) or use Texture3D?
|
||||
// TODO: invalidate texture cache
|
||||
|
||||
/*
|
||||
CComPtr<ID3D11Query> q;
|
||||
|
||||
D3D11_QUERY_DESC qd;
|
||||
memset(&qd, 0, sizeof(qd));
|
||||
qd.Query = D3D11_QUERY_EVENT;
|
||||
|
||||
hr = (*dev)->CreateQuery(&qd, &q);
|
||||
|
||||
ctx->Begin(q);
|
||||
*/
|
||||
|
||||
printf("[%lld] dispatch %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
|
||||
__rdtsc(),
|
||||
m_context->FRAME.Block(), m_context->FRAME.PSM,
|
||||
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
|
||||
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
|
||||
PRIM->PRIM, m_vertex.next, m_index.tail);
|
||||
|
||||
GSVector4i rsize = r.rsize();
|
||||
|
||||
dev->Dispatch(rsize.z >> 4, rsize.w >> 3, 1); // TODO: pass upper-left corner offset (r.xy) in a const buffer
|
||||
|
||||
/*
|
||||
ctx->End(q);
|
||||
|
||||
uint64 t0 = __rdtsc();
|
||||
|
||||
BOOL b;
|
||||
|
||||
while(S_OK != ctx->GetData(q, &b, sizeof(BOOL), 0)) {}
|
||||
|
||||
printf("%lld\n", __rdtsc() - t0);
|
||||
*/
|
||||
}
|
||||
|
||||
void GSRendererCS::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r)
|
||||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.DBP, BITBLTBUF.DBW, BITBLTBUF.DPSM);
|
||||
|
||||
Read(o, r, true); // TODO: fully overwritten pages are not needed to be read, only invalidated
|
||||
|
||||
// TODO: false deps, 8H/4HL/4HH texture sharing pages with 24-bit target
|
||||
// TODO: invalidate texture cache
|
||||
}
|
||||
|
||||
void GSRendererCS::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
{
|
||||
GSOffset* o = m_mem.GetOffset(BITBLTBUF.SBP, BITBLTBUF.SBW, BITBLTBUF.SPSM);
|
||||
|
||||
Read(o, r, false);
|
||||
}
|
||||
|
||||
void GSRendererCS::Write(GSOffset* o, const GSVector4i& r)
|
||||
{
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
D3D11_BOX box;
|
||||
|
||||
memset(&box, 0, sizeof(box));
|
||||
|
||||
uint32* pages = o->GetPages(r);
|
||||
|
||||
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
|
||||
{
|
||||
uint32 page = pages[i];
|
||||
|
||||
uint32 row = page >> 5;
|
||||
uint32 col = 1 << (page & 31);
|
||||
|
||||
if((m_vm_valid[row] & col) == 0)
|
||||
{
|
||||
m_vm_valid[row] |= col;
|
||||
|
||||
box.left = page * PAGE_SIZE;
|
||||
box.right = box.left + PAGE_SIZE;
|
||||
|
||||
ctx->UpdateSubresource(m_vm, 0, &box, m_mem.m_vm8 + box.left, 0, 0);
|
||||
|
||||
printf("[%lld] write %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
|
||||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
}
|
||||
|
||||
void GSRendererCS::Read(GSOffset* o, const GSVector4i& r, bool invalidate)
|
||||
{
|
||||
GSDevice11* dev = (GSDevice11*)m_dev;
|
||||
|
||||
ID3D11DeviceContext* ctx = *dev;
|
||||
|
||||
D3D11_BOX box;
|
||||
|
||||
memset(&box, 0, sizeof(box));
|
||||
|
||||
uint32* pages = o->GetPages(r);
|
||||
|
||||
for(size_t i = 0; pages[i] != GSOffset::EOP; i++)
|
||||
{
|
||||
uint32 page = pages[i];
|
||||
|
||||
uint32 row = page >> 5;
|
||||
uint32 col = 1 << (page & 31);
|
||||
|
||||
if(m_vm_valid[row] & col)
|
||||
{
|
||||
if(invalidate) m_vm_valid[row] ^= col;
|
||||
|
||||
box.left = page * PAGE_SIZE;
|
||||
box.right = box.left + PAGE_SIZE;
|
||||
|
||||
ctx->CopySubresourceRegion(m_pb, 0, 0, 0, 0, m_vm, 0, &box);
|
||||
|
||||
D3D11_MAPPED_SUBRESOURCE map;
|
||||
|
||||
if(SUCCEEDED(ctx->Map(m_pb, 0, D3D11_MAP_READ_WRITE, 0, &map)))
|
||||
{
|
||||
memcpy(m_mem.m_vm8 + box.left, map.pData, PAGE_SIZE);
|
||||
|
||||
ctx->Unmap(m_pb, 0);
|
||||
|
||||
printf("[%lld] read %05x %d %d (%d)\n", __rdtsc(), o->bp, o->bw, o->psm, page);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete [] pages;
|
||||
}
|
|
@ -0,0 +1,59 @@
|
|||
/*
|
||||
* Copyright (C) 2007-2009 Gabest
|
||||
* http://www.gabest.org
|
||||
*
|
||||
* This Program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2, or (at your option)
|
||||
* any later version.
|
||||
*
|
||||
* This Program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with GNU Make; see the file COPYING. If not, write to
|
||||
* the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
* http://www.gnu.org/copyleft/gpl.html
|
||||
*
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "GSRenderer.h"
|
||||
#include "GSDevice11.h"
|
||||
|
||||
class GSRendererCS : public GSRenderer
|
||||
{
|
||||
class GSVertexTraceCS : public GSVertexTrace
|
||||
{
|
||||
public:
|
||||
GSVertexTraceCS(const GSState* state) : GSVertexTrace(state) {}
|
||||
};
|
||||
|
||||
CComPtr<ID3D11Buffer> m_vm;
|
||||
CComPtr<ID3D11UnorderedAccessView> m_vm_uav;
|
||||
CComPtr<ID3D11Buffer> m_vb;
|
||||
CComPtr<ID3D11Buffer> m_ib;
|
||||
CComPtr<ID3D11Buffer> m_pb;
|
||||
hash_map<uint32, CComPtr<ID3D11ComputeShader> > m_cs;
|
||||
uint32 m_vm_valid[16];
|
||||
|
||||
void Write(GSOffset* o, const GSVector4i& r);
|
||||
void Read(GSOffset* o, const GSVector4i& r, bool invalidate);
|
||||
|
||||
protected:
|
||||
template<uint32 prim, uint32 tme, uint32 fst>
|
||||
void ConvertVertex(size_t dst_index, size_t src_index);
|
||||
|
||||
bool CreateDevice(GSDevice* dev);
|
||||
GSTexture* GetOutput(int i);
|
||||
void Draw();
|
||||
void InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r);
|
||||
void InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut);
|
||||
|
||||
public:
|
||||
GSRendererCS();
|
||||
virtual ~GSRendererCS();
|
||||
};
|
|
@ -89,8 +89,7 @@ void GSRendererSW::VSync(int field)
|
|||
//
|
||||
printf("m_sync_count = %d\n", ((GSRasterizerList*)m_rl)->m_sync_count); ((GSRasterizerList*)m_rl)->m_sync_count = 0;
|
||||
printf("m_syncpoint_count = %d\n", ((GSRasterizerList*)m_rl)->m_syncpoint_count); ((GSRasterizerList*)m_rl)->m_syncpoint_count = 0;
|
||||
printf("m_solidrect_count = %d\n", ((GSRasterizerList*)m_rl)->m_solidrect_count); ((GSRasterizerList*)m_rl)->m_solidrect_count = 0;
|
||||
*/
|
||||
*/
|
||||
GSRenderer::VSync(field);
|
||||
|
||||
m_tc->IncAge();
|
||||
|
@ -198,29 +197,38 @@ void GSRendererSW::ConvertVertex(size_t dst_index, size_t src_index)
|
|||
}
|
||||
}
|
||||
|
||||
#define LOG 0
|
||||
|
||||
FILE* s_fp = LOG ? fopen("c:\\temp1\\_.txt", "w") : NULL;
|
||||
|
||||
void GSRendererSW::Draw()
|
||||
{
|
||||
SharedData* sd = new SharedData(this);
|
||||
|
||||
shared_ptr<GSRasterizerData> data(sd);
|
||||
|
||||
if(!GetScanlineGlobalData(sd)) return;
|
||||
sd->primclass = m_vt->m_primclass;
|
||||
sd->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
|
||||
sd->vertex = (GSVertexSW*)sd->buff;
|
||||
sd->vertex_count = m_vertex.next;
|
||||
sd->index = (uint32*)(sd->buff + sizeof(GSVertexSW) * m_vertex.next);
|
||||
sd->index_count = m_index.tail;
|
||||
|
||||
data->primclass = m_vt->m_primclass;
|
||||
data->buff = (uint8*)_aligned_malloc(sizeof(GSVertexSW) * m_vertex.next + sizeof(uint32) * m_index.tail, 32);
|
||||
data->vertex = (GSVertexSW*)data->buff;
|
||||
data->vertex_count = m_vertex.next;
|
||||
data->index = (uint32*)(data->buff + sizeof(GSVertexSW) * m_vertex.next);
|
||||
data->index_count = m_index.tail;
|
||||
|
||||
memcpy(data->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
|
||||
memcpy(data->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
memcpy(sd->vertex, m_vertex.buff, sizeof(GSVertexSW) * m_vertex.next);
|
||||
memcpy(sd->index, m_index.buff, sizeof(uint32) * m_index.tail);
|
||||
|
||||
for(size_t i = 0; i < m_index.tail; i++)
|
||||
{
|
||||
ASSERT(((GSVertexSW*)m_vertex.buff + m_index.buff[i])->_pad.u32[0] == 0x12345678);
|
||||
}
|
||||
|
||||
// TODO: delay texture update, do it later along with the syncing on the dispatcher thread, then this thread does not have to wait and can continue assembling more jobs
|
||||
// TODO: if(any texture page is used as a target) GSRasterizerData::syncpoint = true;
|
||||
// TODO: virtual void GSRasterizerData::Update() {texture[all levels]->Update();}, call it from the dispatcher thread before sending to workers
|
||||
// TODO: m_tc->InvalidatePages must be called after texture->Update, move that inside GSRasterizerData::Update too
|
||||
|
||||
if(!GetScanlineGlobalData(sd)) return;
|
||||
|
||||
//
|
||||
|
||||
const GSDrawingContext* context = m_context;
|
||||
|
@ -232,10 +240,9 @@ void GSRendererSW::Draw()
|
|||
|
||||
scissor.z = std::min<int>(scissor.z, (int)context->FRAME.FBW * 64); // TODO: find a game that overflows and check which one is the right behaviour
|
||||
|
||||
data->scissor = scissor;
|
||||
data->bbox = bbox;
|
||||
data->solidrect = gd.sel.IsSolidRect();
|
||||
data->frame = m_perfmon.GetFrame();
|
||||
sd->scissor = scissor;
|
||||
sd->bbox = bbox;
|
||||
sd->frame = m_perfmon.GetFrame();
|
||||
|
||||
//
|
||||
|
||||
|
@ -262,41 +269,75 @@ void GSRendererSW::Draw()
|
|||
|
||||
if(m_fzb != context->offset.fzb)
|
||||
{
|
||||
m_fzb = context->offset.fzb;
|
||||
// hmm, what if "r" gets bigger next time and slips through unchecked, need to trace that too
|
||||
|
||||
data->syncpoint = true;
|
||||
}
|
||||
sd->syncpoint = true; // TODO
|
||||
|
||||
// - chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue
|
||||
// - m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
|
||||
|
||||
if(!data->syncpoint)
|
||||
{
|
||||
if(gd.sel.fwrite)
|
||||
if(!sd->syncpoint)
|
||||
{
|
||||
for(const uint32* p = fb_pages; *p != GSOffset::EOP; p++)
|
||||
if(fb_pages == NULL)
|
||||
{
|
||||
if(m_fzb_pages[*p] & 0xffff0000) // already used as a z-buffer
|
||||
fb_pages = context->offset.fb->GetPages(r);
|
||||
}
|
||||
|
||||
if(CheckTargetPages<0xffffffff>(fb_pages))
|
||||
{
|
||||
sd->syncpoint = true;
|
||||
|
||||
if(LOG) fprintf(s_fp, "syncpoint 0\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(!sd->syncpoint)
|
||||
{
|
||||
if(zb_pages == NULL)
|
||||
{
|
||||
zb_pages = context->offset.zb->GetPages(r);
|
||||
}
|
||||
|
||||
if(CheckTargetPages<0xffffffff>(zb_pages))
|
||||
{
|
||||
sd->syncpoint = true;
|
||||
|
||||
if(LOG) fprintf(s_fp, "syncpoint 1\n");
|
||||
}
|
||||
}
|
||||
|
||||
if(!sd->syncpoint)
|
||||
{
|
||||
if(LOG) fprintf(s_fp, "no syncpoint *\n");
|
||||
}
|
||||
|
||||
m_fzb = context->offset.fzb;
|
||||
}
|
||||
else
|
||||
{
|
||||
// chross-check frame and z-buffer pages, they cannot overlap with eachother and with previous batches in queue,
|
||||
// m_fzb filters out most of these cases, only have to be careful when the addresses stay the same and the output
|
||||
// is mutually enabled/disabled and alternating (Bully FBP/ZBP = 0x2300)
|
||||
|
||||
if(!sd->syncpoint)
|
||||
{
|
||||
if(gd.sel.fwrite)
|
||||
{
|
||||
if(CheckTargetPages<0xffff0000>(fb_pages)) // already used as a z-buffer
|
||||
{
|
||||
data->syncpoint = true;
|
||||
|
||||
break;
|
||||
sd->syncpoint = true;
|
||||
|
||||
if(LOG) fprintf(s_fp, "syncpoint 2\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if(!data->syncpoint)
|
||||
{
|
||||
if(gd.sel.zwrite)
|
||||
if(!sd->syncpoint)
|
||||
{
|
||||
for(const uint32* p = zb_pages; *p != GSOffset::EOP; p++)
|
||||
if(gd.sel.zwrite)
|
||||
{
|
||||
if(m_fzb_pages[*p] & 0x0000ffff) // already used as a frame buffer
|
||||
if(CheckTargetPages<0x0000ffff>(zb_pages)) // already used as a frame buffer
|
||||
{
|
||||
data->syncpoint = true;
|
||||
sd->syncpoint = true;
|
||||
|
||||
break;
|
||||
if(LOG) fprintf(s_fp, "syncpoint 3\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -363,6 +404,12 @@ void GSRendererSW::Draw()
|
|||
}
|
||||
else
|
||||
{
|
||||
if(LOG) fprintf(s_fp, "queue %05x %d %05x %d %05x %d %dx%d | %d %d %d\n",
|
||||
m_context->FRAME.Block(), m_context->FRAME.PSM,
|
||||
m_context->ZBUF.Block(), m_context->ZBUF.PSM,
|
||||
PRIM->TME ? m_context->TEX0.TBP0 : 0xfffff, m_context->TEX0.PSM, (int)m_context->TEX0.TW, (int)m_context->TEX0.TH,
|
||||
PRIM->PRIM, sd->vertex_count, sd->index_count);
|
||||
|
||||
m_rl->Queue(data);
|
||||
}
|
||||
|
||||
|
@ -384,8 +431,16 @@ void GSRendererSW::Sync(int reason)
|
|||
|
||||
GSPerfMonAutoTimer pmat(&m_perfmon, GSPerfMon::Sync);
|
||||
|
||||
uint64 t = __rdtsc();
|
||||
|
||||
m_rl->Sync();
|
||||
|
||||
s_n++;
|
||||
|
||||
t = __rdtsc() - t;
|
||||
|
||||
if(LOG) fprintf(s_fp, "sync n=%d r=%d t=%lld p=%d %c\n", s_n, reason, t, m_rl->GetPixels(), t > 10000000 ? '*' : ' ');
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Fillrate, m_rl->GetPixels());
|
||||
}
|
||||
|
||||
|
@ -397,8 +452,6 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
|
||||
o->GetPages(r, p);
|
||||
|
||||
m_tc->InvalidatePages(p, o->psm);
|
||||
|
||||
// check if the changing pages either used as a texture or a target
|
||||
|
||||
for(; *p != GSOffset::EOP; p++)
|
||||
|
@ -414,6 +467,8 @@ void GSRendererSW::InvalidateVideoMem(const GIFRegBITBLTBUF& BITBLTBUF, const GS
|
|||
break;
|
||||
}
|
||||
}
|
||||
|
||||
m_tc->InvalidatePages(m_tmp_pages, o->psm); // if texture update runs on a thread and Sync(5) happens then this must come later
|
||||
}
|
||||
|
||||
void GSRendererSW::InvalidateLocalMem(const GIFRegBITBLTBUF& BITBLTBUF, const GSVector4i& r, bool clut)
|
||||
|
@ -493,6 +548,19 @@ void GSRendererSW::ReleasePages(const uint32* pages, int type)
|
|||
}
|
||||
}
|
||||
|
||||
template<uint32 mask> bool GSRendererSW::CheckTargetPages(const uint32* pages)
|
||||
{
|
||||
for(const uint32* p = pages; *p != GSOffset::EOP; p++)
|
||||
{
|
||||
if(mask != 0xffffffff ? (m_fzb_pages[*p] & mask) : m_fzb_pages[*p])
|
||||
{
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
#include "GSTextureSW.h"
|
||||
|
||||
bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
||||
|
@ -811,19 +879,19 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
{
|
||||
// skip per pixel division if q is constant
|
||||
|
||||
GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
|
||||
GSVertexSW* RESTRICT v = data->vertex;
|
||||
|
||||
if(m_vt->m_eq.q)
|
||||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
const GSVector4& t = v[m_index.buff[0]].t;
|
||||
const GSVector4& t = v[data->index[0]].t;
|
||||
|
||||
if(t.z != 1.0f)
|
||||
{
|
||||
GSVector4 w = t.zzzz().rcpnr();
|
||||
|
||||
for(int i = 0, j = m_vertex.next; i < j; i++)
|
||||
for(int i = 0, j = data->vertex_count; i < j; i++)
|
||||
{
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
|
@ -835,7 +903,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
{
|
||||
gd.sel.fst = 1;
|
||||
|
||||
for(int i = 0, j = m_vertex.next; i < j; i += 2)
|
||||
for(int i = 0, j = data->vertex_count; i < j; i += 2)
|
||||
{
|
||||
GSVector4 t0 = v[i + 0].t;
|
||||
GSVector4 t1 = v[i + 1].t;
|
||||
|
@ -856,9 +924,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
|
|||
|
||||
GSVector4 half(0x8000, 0x8000);
|
||||
|
||||
GSVertexSW* RESTRICT v = (GSVertexSW*)m_vertex.buff;
|
||||
GSVertexSW* RESTRICT v = data->vertex;
|
||||
|
||||
for(int i = 0, j = m_vertex.next; i < j; i++)
|
||||
for(int i = 0, j = data->vertex_count; i < j; i++)
|
||||
{
|
||||
GSVector4 t = v[i].t;
|
||||
|
||||
|
@ -1051,12 +1119,12 @@ GSRendererSW::SharedData::~SharedData()
|
|||
|
||||
delete m_fb_pages;
|
||||
delete m_zb_pages;
|
||||
|
||||
|
||||
for(size_t i = 0; i < countof(m_tex_pages) && m_tex_pages[i] != NULL; i++)
|
||||
{
|
||||
m_parent->ReleasePages(m_tex_pages[i], 2);
|
||||
}
|
||||
|
||||
|
||||
if(global.clut) _aligned_free(global.clut);
|
||||
if(global.dimx) _aligned_free(global.dimx);
|
||||
}
|
||||
|
|
|
@ -66,6 +66,7 @@ protected:
|
|||
|
||||
void UsePages(const uint32* pages, int type);
|
||||
void ReleasePages(const uint32* pages, int type);
|
||||
template<uint32 mask> bool CheckTargetPages(const uint32* pages);
|
||||
|
||||
bool GetScanlineGlobalData(SharedData* data);
|
||||
|
||||
|
|
|
@ -671,11 +671,6 @@ template<int i> void GSState::ApplyTEX0(GIFRegTEX0& TEX0)
|
|||
|
||||
TEX0.CPSM &= 0xa; // 1010b
|
||||
|
||||
if((TEX0.TBW & 1) && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4))
|
||||
{
|
||||
TEX0.TBW &= ~1; // GS User 2.6
|
||||
}
|
||||
|
||||
if((TEX0.u32[0] ^ m_env.CTXT[i].TEX0.u32[0]) & 0x3ffffff) // TBP0 TBW PSM
|
||||
{
|
||||
m_env.CTXT[i].offset.tex = m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
@ -709,6 +704,13 @@ template<int i> void GSState::GIFRegHandlerTEX0(const GIFReg* RESTRICT r)
|
|||
if(TEX0.TW > 10) TEX0.TW = 10;
|
||||
if(TEX0.TH > 10) TEX0.TH = 10;
|
||||
|
||||
if((TEX0.TBW & 1) && (TEX0.PSM == PSM_PSMT8 || TEX0.PSM == PSM_PSMT4))
|
||||
{
|
||||
ASSERT(TEX0.TBW == 1); // TODO
|
||||
|
||||
TEX0.TBW &= ~1; // GS User 2.6
|
||||
}
|
||||
|
||||
ApplyTEX0<i>(TEX0);
|
||||
|
||||
if(m_env.CTXT[i].TEX1.MTBA)
|
||||
|
@ -1265,32 +1267,34 @@ void GSState::FlushPrim()
|
|||
size_t head = m_vertex.head;
|
||||
size_t tail = m_vertex.tail;
|
||||
size_t next = m_vertex.next;
|
||||
size_t unused = 0;
|
||||
|
||||
if(tail > head)
|
||||
{
|
||||
switch(PRIM->PRIM)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
ASSERT(0);
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
|
||||
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (head + 1)], stride);
|
||||
unused = tail - head;
|
||||
memcpy(buff, &m_vertex.buff[stride * head], stride * unused);
|
||||
break;
|
||||
case GS_TRIANGLEFAN:
|
||||
if(tail > head + 0) memcpy(&buff[stride * 0], &m_vertex.buff[stride * (head + 0)], stride);
|
||||
if(tail > head + 1) memcpy(&buff[stride * 1], &m_vertex.buff[stride * (tail - 1)], stride);
|
||||
memcpy(buff, &m_vertex.buff[stride * head], stride); unused = 1;
|
||||
if(tail - 1 > head) {memcpy(&buff[stride], &m_vertex.buff[stride * (tail - 1)], stride); unused = 2;}
|
||||
break;
|
||||
case GS_INVALID:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
|
||||
ASSERT(unused < GSUtil::GetVertexCount(PRIM->PRIM));
|
||||
}
|
||||
|
||||
if(GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt < 3 && GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt < 3)
|
||||
|
@ -1308,34 +1312,19 @@ void GSState::FlushPrim()
|
|||
m_index.tail = 0;
|
||||
|
||||
m_vertex.head = 0;
|
||||
m_vertex.tail = 0;
|
||||
m_vertex.next = 0;
|
||||
|
||||
if(tail > head)
|
||||
if(unused > 0)
|
||||
{
|
||||
switch(PRIM->PRIM)
|
||||
{
|
||||
case GS_POINTLIST:
|
||||
break;
|
||||
case GS_LINELIST:
|
||||
case GS_LINESTRIP:
|
||||
case GS_SPRITE:
|
||||
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
|
||||
break;
|
||||
case GS_TRIANGLELIST:
|
||||
case GS_TRIANGLESTRIP:
|
||||
case GS_TRIANGLEFAN:
|
||||
if(tail > head + 0) {memcpy(&m_vertex.buff[stride * 0], &buff[stride * 0], stride); m_vertex.tail++;}
|
||||
if(tail > head + 1) {memcpy(&m_vertex.buff[stride * 1], &buff[stride * 1], stride); m_vertex.tail++;}
|
||||
break;
|
||||
case GS_INVALID:
|
||||
break;
|
||||
default:
|
||||
__assume(0);
|
||||
}
|
||||
memcpy(m_vertex.buff, buff, stride * unused);
|
||||
|
||||
m_vertex.tail = unused;
|
||||
m_vertex.next = next > head ? next - head : 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
m_vertex.tail = 0;
|
||||
m_vertex.next = 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1380,6 +1369,15 @@ void GSState::Write(const uint8* mem, int len)
|
|||
m_tr.start = m_tr.end = m_tr.total;
|
||||
|
||||
m_perfmon.Put(GSPerfMon::Swizzle, len);
|
||||
|
||||
/*
|
||||
static int n = 0;
|
||||
string s;
|
||||
s = format("c:\\temp1\\[%04d]_%05x_%d_%d_%d_%d_%d_%d.bmp",
|
||||
n++, (int)m_env.BITBLTBUF.DBP, (int)m_env.BITBLTBUF.DBW, (int)m_env.BITBLTBUF.DPSM,
|
||||
r.left, r.top, r.right, r.bottom);
|
||||
m_mem.SaveBMP(s, m_env.BITBLTBUF.DBP, m_env.BITBLTBUF.DBW, m_env.BITBLTBUF.DPSM, r.right, r.bottom);
|
||||
*/
|
||||
}
|
||||
else
|
||||
{
|
||||
|
|
|
@ -95,7 +95,7 @@ void GSTextureCacheSW::InvalidatePages(const uint32* pages, uint32 psm)
|
|||
{
|
||||
Texture* t = *i;
|
||||
|
||||
if(GSUtil::HasSharedBits(psm, t->m_TEX0.PSM))
|
||||
if(GSUtil::HasSharedBits(psm, t->m_sharedbits))
|
||||
{
|
||||
uint32* RESTRICT valid = t->m_valid;
|
||||
|
||||
|
@ -181,6 +181,8 @@ GSTextureCacheSW::Texture::Texture(GSState* state, uint32 tw0, const GIFRegTEX0&
|
|||
memset(m_valid, 0, sizeof(m_valid));
|
||||
memset(m_pages.bm, 0, sizeof(m_pages.bm));
|
||||
|
||||
m_sharedbits = GSUtil::HasSharedBitsPtr(m_TEX0.PSM);
|
||||
|
||||
m_offset = m_state->m_mem.GetOffset(TEX0.TBP0, TEX0.TBW, TEX0.PSM);
|
||||
|
||||
m_pages.n = m_offset->GetPages(GSVector4i(0, 0, 1 << TEX0.TW, 1 << TEX0.TH));
|
||||
|
|
|
@ -41,6 +41,7 @@ public:
|
|||
vector<GSVector2i>* m_p2t;
|
||||
uint32 m_valid[MAX_PAGES];
|
||||
struct {uint32 bm[16]; const uint32* n;} m_pages;
|
||||
const uint32* RESTRICT m_sharedbits;
|
||||
|
||||
// m_valid
|
||||
// fast mode: each uint32 bits map to the 32 blocks of that page
|
||||
|
|
|
@ -275,19 +275,15 @@ protected:
|
|||
if(m_exit) {m_cv.lock.Unlock(); return;}
|
||||
}
|
||||
|
||||
{
|
||||
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
|
||||
T& item = m_queue.front();
|
||||
|
||||
T item = m_queue.front();
|
||||
m_cv.lock.Unlock();
|
||||
|
||||
m_cv.lock.Unlock();
|
||||
Process(item);
|
||||
|
||||
Process(item);
|
||||
m_cv.lock.Lock();
|
||||
|
||||
m_cv.lock.Lock();
|
||||
|
||||
m_queue.pop();
|
||||
}
|
||||
m_queue.pop();
|
||||
|
||||
if(m_queue.empty())
|
||||
{
|
||||
|
@ -312,23 +308,18 @@ protected:
|
|||
m_ev.lock.Lock();
|
||||
}
|
||||
|
||||
{
|
||||
// NOTE: this is scoped because we must make sure the last item is no longer around when Wait detects an empty queue
|
||||
T& item = m_queue.front();
|
||||
|
||||
T item = m_queue.front();
|
||||
m_ev.lock.Unlock();
|
||||
|
||||
m_ev.lock.Unlock();
|
||||
Process(item);
|
||||
|
||||
Process(item);
|
||||
m_ev.lock.Lock();
|
||||
|
||||
m_ev.lock.Lock();
|
||||
|
||||
m_queue.pop();
|
||||
}
|
||||
m_queue.pop();
|
||||
|
||||
_InterlockedDecrement(&m_ev.count);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -161,6 +161,16 @@ int GSUtil::GetVertexCount(uint32 prim)
|
|||
return s_maps.VertexCountField[prim];
|
||||
}
|
||||
|
||||
const uint32* GSUtil::HasSharedBitsPtr(uint32 dpsm)
|
||||
{
|
||||
return s_maps.SharedBitsField[dpsm];
|
||||
}
|
||||
|
||||
bool GSUtil::HasSharedBits(uint32 spsm, const uint32* RESTRICT ptr)
|
||||
{
|
||||
return (ptr[spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||
}
|
||||
|
||||
bool GSUtil::HasSharedBits(uint32 spsm, uint32 dpsm)
|
||||
{
|
||||
return (s_maps.SharedBitsField[dpsm][spsm >> 5] & (1 << (spsm & 0x1f))) == 0;
|
||||
|
|
|
@ -31,6 +31,8 @@ public:
|
|||
static GS_PRIM_CLASS GetPrimClass(uint32 prim);
|
||||
static int GetVertexCount(uint32 prim);
|
||||
|
||||
static const uint32* HasSharedBitsPtr(uint32 dpsm);
|
||||
static bool HasSharedBits(uint32 spsm, const uint32* ptr);
|
||||
static bool HasSharedBits(uint32 spsm, uint32 dpsm);
|
||||
static bool HasSharedBits(uint32 sbp, uint32 spsm, uint32 dbp, uint32 dpsm);
|
||||
static bool HasCompatibleBits(uint32 spsm, uint32 dpsm);
|
||||
|
|
|
@ -57,6 +57,7 @@ IDR_TFX_FX RCDATA "res\\tfx.fx"
|
|||
IDR_MERGE_FX RCDATA "res\\merge.fx"
|
||||
IDR_INTERLACE_FX RCDATA "res\\interlace.fx"
|
||||
IDR_FXAA_FX RCDATA "res\\fxaa.fx"
|
||||
IDR_CS_FX RCDATA "res\\cs.fx"
|
||||
|
||||
/////////////////////////////////////////////////////////////////////////////
|
||||
//
|
||||
|
|
|
@ -531,6 +531,7 @@
|
|||
<AssemblerOutput Condition="'$(Configuration)|$(Platform)'=='Release AVX|x64'">AssemblyAndSourceCode</AssemblerOutput>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSRenderer.cpp" />
|
||||
<ClCompile Include="GSRendererCS.cpp" />
|
||||
<ClCompile Include="GSRendererDX.cpp" />
|
||||
<ClCompile Include="GSRendererDX11.cpp" />
|
||||
<ClCompile Include="GSRendererDX9.cpp" />
|
||||
|
@ -1658,6 +1659,7 @@
|
|||
<ClInclude Include="GSPerfMon.h" />
|
||||
<ClInclude Include="GSRasterizer.h" />
|
||||
<ClInclude Include="GSRenderer.h" />
|
||||
<ClInclude Include="GSRendererCS.h" />
|
||||
<ClInclude Include="GSRendererDX.h" />
|
||||
<ClInclude Include="GSRendererDX11.h" />
|
||||
<ClInclude Include="GSRendererDX9.h" />
|
||||
|
@ -1727,6 +1729,7 @@
|
|||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="GSdx.def" />
|
||||
<None Include="res\cs.fx" />
|
||||
<None Include="res\fxaa.fx" />
|
||||
<None Include="res\logo10.bmp" />
|
||||
<None Include="res\logo9.bmp" />
|
||||
|
|
|
@ -324,6 +324,9 @@
|
|||
<ClCompile Include="GSDrawScanlineCodeGenerator.x86.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
<ClCompile Include="GSRendererCS.cpp">
|
||||
<Filter>Source Files</Filter>
|
||||
</ClCompile>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="GS.h">
|
||||
|
@ -647,6 +650,9 @@
|
|||
<ClInclude Include="config.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
<ClInclude Include="GSRendererCS.h">
|
||||
<Filter>Header Files</Filter>
|
||||
</ClInclude>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<None Include="res\logo10.bmp">
|
||||
|
@ -677,6 +683,9 @@
|
|||
<None Include="res\fxaa.fx">
|
||||
<Filter>Shaders</Filter>
|
||||
</None>
|
||||
<None Include="res\cs.fx">
|
||||
<Filter>Shaders</Filter>
|
||||
</None>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ResourceCompile Include="GSdx.rc">
|
||||
|
|
|
@ -0,0 +1,73 @@
|
|||
struct Vertex
|
||||
{
|
||||
float2 st;
|
||||
uint c;
|
||||
float q;
|
||||
uint xy, z;
|
||||
uint uv, f;
|
||||
};
|
||||
|
||||
RWByteAddressBuffer VideoMemory : register(u0);
|
||||
|
||||
StructuredBuffer<Vertex> VertexBuffer : register(t0);
|
||||
Buffer<uint> IndexBuffer : register(t1);
|
||||
|
||||
Buffer<int> FrameRowOffset : register(t2);
|
||||
Buffer<int> FrameColOffset : register(t3);
|
||||
Buffer<int> ZBufRowOffset : register(t4);
|
||||
Buffer<int> ZBufColOffset : register(t5);
|
||||
|
||||
cbuffer DrawingEnvironment : register(c0)
|
||||
{
|
||||
// TODO
|
||||
};
|
||||
|
||||
// one group is 16x8 pixels and one thread does 2 pixels, otherwise could not read-merge-write 16-bit targets safely
|
||||
// neighburing pixels are next to eachother in memory, at least we don't have to calculate the address twice
|
||||
|
||||
// TODO: they say groupshared memory is faster, try unswizzling the corresponding chunk of memory initially (how to do that once by only one thread?) then write-back when finished, unless it was untouched
|
||||
|
||||
[numthreads(8, 8, 1)]
|
||||
void cs_main(uint3 gid : SV_GroupID, uint3 tid : SV_GroupThreadID)
|
||||
{
|
||||
uint count;
|
||||
|
||||
IndexBuffer.GetDimensions(count);
|
||||
|
||||
// #if GS_PRIM == 2 (triangle)
|
||||
|
||||
for(uint i = 0; i < count; i += 3)
|
||||
{
|
||||
Vertex v0 = VertexBuffer[IndexBuffer[i + 0]];
|
||||
Vertex v1 = VertexBuffer[IndexBuffer[i + 1]];
|
||||
Vertex v2 = VertexBuffer[IndexBuffer[i + 2]];
|
||||
|
||||
uint x = gid.x + tid.x * 2;
|
||||
uint y = gid.y + tid.y;
|
||||
|
||||
uint fa = FrameRowOffset[y] + FrameColOffset[x];
|
||||
uint za = ZBufRowOffset[y] + ZBufColOffset[x];
|
||||
|
||||
// TODO: quickly reject if x, y is outside the triangle
|
||||
// TODO: calculate interpolated values at x, y
|
||||
// TODO: run the GS pipeline
|
||||
// TODO: repeat for x+1, y
|
||||
// TODO: output two pixels (might be better to process a single pixel, more threads, if there is no 16-bit target involved)
|
||||
|
||||
// testing...
|
||||
|
||||
uint4 c = VideoMemory.Load4(fa); // does this load 4*4 bytes? or 4 bytes each expanded uint?
|
||||
|
||||
c = (v0.c >> uint4(0, 8, 16, 24)) & 0xff; // => ushr r1.yzw, r1.xxxx, l(0, 8, 16, 24), v0.c auto-converted to uint4 and per-component shift in one instruction, SSE is embarrassed
|
||||
|
||||
VideoMemory.Store4(fa, c); // same question, 4*4 bytes or compressed to uint
|
||||
}
|
||||
|
||||
// #endif
|
||||
}
|
||||
|
||||
// TODO: DrawPoint (this is going to be a waste of resources)
|
||||
// TODO: DrawLine (line hit-test, will it work?)
|
||||
// TODO: DrawSprite (similar to DrawTriangle)
|
||||
// TODO: if read-backs are too slow, implement GSState::Write/FlushWrite/Read/clut.Write in a compute shader
|
||||
// TODO: unswizzle pages from VideoMemory to the texture cache (if they are marked as valid, otherwise upload from GSLocalMemory::m_vm8)
|
|
@ -81,12 +81,13 @@
|
|||
#define IDR_INTERLACE_FX 10003
|
||||
#define IDD_CONFIG2 10004
|
||||
#define IDR_FXAA_FX 10005
|
||||
#define IDR_CS_FX 10006
|
||||
|
||||
// Next default values for new objects
|
||||
//
|
||||
#ifdef APSTUDIO_INVOKED
|
||||
#ifndef APSTUDIO_READONLY_SYMBOLS
|
||||
#define _APS_NEXT_RESOURCE_VALUE 10006
|
||||
#define _APS_NEXT_RESOURCE_VALUE 10007
|
||||
#define _APS_NEXT_COMMAND_VALUE 32771
|
||||
#define _APS_NEXT_CONTROL_VALUE 2050
|
||||
#define _APS_NEXT_SYMED_VALUE 5000
|
||||
|
|
Loading…
Reference in New Issue