GS: Add new shared hw renderer

This commit is contained in:
TellowKrinkle 2021-11-15 17:26:36 -06:00 committed by lightningterror
parent b8984661d9
commit 66bc1bdc77
11 changed files with 2558 additions and 5 deletions

View File

@ -614,6 +614,7 @@ set(pcsx2GSSources
GS/Renderers/Null/GSTextureNull.cpp
GS/Renderers/HW/GSHwHack.cpp
GS/Renderers/HW/GSRendererHW.cpp
GS/Renderers/HW/GSRendererNew.cpp
GS/Renderers/HW/GSTextureCache.cpp
GS/Renderers/SW/GSDrawScanline.cpp
GS/Renderers/SW/GSDrawScanlineCodeGenerator.cpp
@ -681,6 +682,7 @@ set(pcsx2GSHeaders
GS/Renderers/Null/GSRendererNull.h
GS/Renderers/Null/GSTextureNull.h
GS/Renderers/HW/GSRendererHW.h
GS/Renderers/HW/GSRendererNew.h
GS/Renderers/HW/GSTextureCache.h
GS/Renderers/HW/GSVertexHW.h
GS/Renderers/SW/GSDrawScanlineCodeGenerator.h

View File

@ -23,6 +23,7 @@
#include "Renderers/Null/GSDeviceNull.h"
#include "Renderers/OpenGL/GSDeviceOGL.h"
#include "Renderers/OpenGL/GSRendererOGL.h"
#include "Renderers/HW/GSRendererNew.h"
#include "GSLzma.h"
#include "common/pxStreams.h"
@ -218,13 +219,9 @@ int _GSopen(const WindowInfo& wi, const char* title, GSRendererType renderer, in
switch (renderer)
{
default:
#ifdef _WIN32
case GSRendererType::DX1011_HW:
s_gs = (GSRenderer*)new GSRendererDX11();
break;
#endif
case GSRendererType::OGL_HW:
s_gs = (GSRenderer*)new GSRendererOGL();
s_gs = (GSRenderer*)new GSRendererNew();
break;
case GSRendererType::OGL_SW:
s_gs = new GSRendererSW(threads);

View File

@ -134,8 +134,360 @@ struct HWBlend
u16 flags, op, src, dst;
};
struct GSHWDrawConfig
{
enum class Topology: u8
{
Point,
Line,
Triangle,
};
enum class GSTopology: u8
{
Point,
Line,
Triangle,
Sprite,
};
struct GSSelector
{
union
{
struct
{
GSTopology topology : 2;
bool expand : 1;
bool iip : 1;
};
u8 key;
};
GSSelector(): key(0) {}
GSSelector(u8 k): key(k) {}
};
struct VSSelector
{
union
{
struct
{
u8 fst : 1;
u8 tme : 1;
u8 _free : 6;
};
u8 key;
};
VSSelector(): key(0) {}
VSSelector(u8 k): key(k) {}
};
struct PSSelector
{
// Performance note: there are too many shader combinations
// It might hurt the performance due to frequent toggling worse it could consume
// a lots of memory.
union
{
struct
{
// *** Word 1
// Format
u32 aem_fmt : 2;
u32 pal_fmt : 2;
u32 dfmt : 2; // 0 → 32-bit, 1 → 24-bit, 2 → 16-bit
u32 depth_fmt : 2; // 0 → None, 1 → 32-bit, 2 → 16-bit, 3 → RGBA
// Alpha extension/Correction
u32 aem : 1;
u32 fba : 1;
// Fog
u32 fog : 1;
// Flat/goround shading
u32 iip : 1;
// Pixel test
u32 date : 3;
u32 atst : 3;
// Color sampling
u32 fst : 1; // Investigate to do it on the VS
u32 tfx : 3;
u32 tcc : 1;
u32 wms : 2;
u32 wmt : 2;
u32 ltf : 1;
// Shuffle and fbmask effect
u32 shuffle : 1;
u32 read_ba : 1;
u32 write_rg : 1;
u32 fbmask : 1;
//u32 _free1:0;
// *** Word 2
// Blend and Colclip
u32 blend_a : 2;
u32 blend_b : 2;
u32 blend_c : 2;
u32 blend_d : 2;
u32 clr1 : 1; // useful?
u32 hdr : 1;
u32 colclip : 1;
u32 pabe : 1;
// Others ways to fetch the texture
u32 channel : 3;
// Dithering
u32 dither : 2;
// Depth clamp
u32 zclamp : 1;
// Hack
u32 tcoffsethack : 1;
u32 urban_chaos_hle : 1;
u32 tales_of_abyss_hle : 1;
u32 tex_is_fb : 1; // Jak Shadows
u32 automatic_lod : 1;
u32 manual_lod : 1;
u32 point_sampler : 1;
u32 invalid_tex0 : 1; // Lupin the 3rd
u32 _free2 : 6;
};
u64 key;
};
PSSelector(): key(0) {}
};
struct SamplerSelector
{
union
{
struct
{
u8 tau : 1;
u8 tav : 1;
u8 biln : 1;
u8 triln : 3;
u8 aniso : 1;
u8 _free : 1;
};
u8 key;
};
SamplerSelector(): key(0) {}
SamplerSelector(u32 k): key(k) {}
static SamplerSelector Point() { return SamplerSelector(); }
static SamplerSelector Linear()
{
SamplerSelector out;
out.biln = 1;
return out;
}
};
struct DepthStencilSelector
{
union
{
struct
{
u8 ztst : 2;
u8 zwe : 1;
u8 date : 1;
u8 date_one : 1;
u8 _free : 3;
};
u8 key;
};
DepthStencilSelector(): key(0) {}
DepthStencilSelector(u32 k): key(k) {}
static DepthStencilSelector NoDepth()
{
DepthStencilSelector out;
out.ztst = ZTST_ALWAYS;
return out;
}
};
struct ColorMaskSelector
{
union
{
struct
{
u8 wr : 1;
u8 wg : 1;
u8 wb : 1;
u8 wa : 1;
u8 _free : 4;
};
struct
{
u8 wrgba : 4;
};
u8 key;
};
ColorMaskSelector(): key(0xF) {}
ColorMaskSelector(u32 c): key(0) { wrgba = c; }
};
struct VSConstantBuffer
{
GSVector2 vertex_scale;
GSVector2 vertex_offset;
GSVector2 texture_scale;
GSVector2 texture_offset;
GSVector2 point_size;
GSVector2i max_depth;
};
struct PSConstantBuffer
{
union
{
struct
{
u8 fog_color[3];
u8 aref;
};
u32 fog_color_aref;
};
union
{
struct
{
u8 r, g, b, a;
} fbmask;
u32 fbmask_int;
};
u32 max_depth;
union
{
struct
{
u8 ta0;
u8 ta1;
u8 _pad;
u8 alpha_fix;
};
u32 ta_af;
};
union
{
struct
{
u8 blue_mask;
u8 blue_shift;
u8 green_mask;
u8 green_shift;
} channel_shuffle;
u32 channel_shuffle_int;
};
union
{
struct
{
u16 umsk;
u16 vmsk;
u16 ufix;
u16 vfix;
};
u64 uv_msk_fix;
};
GIFRegDIMX dither_matrix;
GSVector2 tc_offset;
GSVector4 texture_size; // xy → PS2 size, wz → emulator size
GSVector4 half_texel;
GSVector4 uv_min_max;
};
struct BlendState
{
union
{
struct
{
u8 index;
u8 factor;
bool is_constant : 1;
bool is_accumulation : 1;
bool is_mixed_hw_sw : 1;
};
u32 key;
};
BlendState(): key(0) {}
BlendState(u8 index, u8 factor, bool is_constant, bool is_accumulation, bool is_mixed_hw_sw)
: key(0)
{
this->index = index;
this->factor = factor;
this->is_constant = is_constant;
this->is_accumulation = is_accumulation;
this->is_mixed_hw_sw = is_mixed_hw_sw;
}
};
enum class DestinationAlphaMode : u8
{
Off, ///< No destination alpha test
Stencil, ///< Emulate using read-only stencil
StencilOne, ///< Emulate using read-write stencil (first write wins)
PrimIDTracking, ///< Emulate by tracking the primitive ID of the last pixel allowed through
Full, ///< Full emulation (using barriers / ROV)
};
GSTexture* rt; ///< Render target
GSTexture* ds; ///< Depth stencil
GSTexture* tex; ///< Source texture
GSTexture* pal; ///< Palette texture
GSTexture* raw_tex; ///< Used by channel shuffles
GSVertex* verts; ///< Vertices to draw
u32* indices; ///< Indices to draw
u32 nverts; ///< Number of vertices
u32 nindices; ///< Number of indices
u32 indices_per_prim; ///< Number of indices that make up one primitive
const std::vector<size_t>* drawlist; ///< For reducing barriers on sprites
GSVector4i scissor; ///< Scissor rect
Topology topology; ///< Draw topology
GSSelector gs;
VSSelector vs;
PSSelector ps;
BlendState blend;
SamplerSelector sampler;
ColorMaskSelector colormask;
DepthStencilSelector depth;
bool require_one_barrier; ///< Require texture barrier before draw (also used to requst an rt copy if texture barrier isn't supported)
bool require_full_barrier; ///< Require texture barrier between all prims
DestinationAlphaMode destination_alpha;
bool datm;
VSConstantBuffer cb_vs;
PSConstantBuffer cb_ps;
struct AlphaSecondPass
{
bool enable;
PSSelector ps;
PSConstantBuffer cb_ps;
ColorMaskSelector colormask;
DepthStencilSelector depth;
} alpha_second_pass;
};
class GSDevice : public GSAlignedClass<32>
{
public:
struct FeatureSupport
{
bool broken_point_sampler : 1; ///< Issue with AMD cards, see tfx shader for details
bool geometry_shader : 1; ///< Supports geometry shader
bool image_load_store : 1; ///< Supports atomic min and max on images (for use with prim tracking destination alpha algorithm)
bool texture_barrier : 1; ///< Supports sampling rt and hopefully texture barrier
FeatureSupport()
{
memset(this, 0, sizeof(*this));
}
};
private:
FastList<GSTexture*> m_pool;
static std::array<HWBlend, 3*3*3*3 + 1> m_blendMap;
@ -174,6 +526,7 @@ protected:
} m_index;
unsigned int m_frame; // for ageing the pool
bool m_linear_present;
FeatureSupport m_features;
virtual GSTexture* CreateSurface(GSTexture::Type type, int w, int h, GSTexture::Format format) = 0;
virtual GSTexture* FetchSurface(GSTexture::Type type, int w, int h, GSTexture::Format format);
@ -245,6 +598,9 @@ public:
void StretchRect(GSTexture* sTex, GSTexture* dTex, const GSVector4& dRect, ShaderConvert shader = ShaderConvert::COPY, bool linear = true);
virtual void RenderHW(GSHWDrawConfig& config) {}
FeatureSupport Features() { return m_features; }
GSTexture* GetCurrent();
void Merge(GSTexture* sTex[3], GSVector4* sRect, GSVector4* dRect, const GSVector2i& fs, const GSRegPMODE& PMODE, const GSRegEXTBUF& EXTBUF, const GSVector4& c);

View File

@ -41,6 +41,11 @@ GSDevice11::GSDevice11()
m_aniso_filter = aniso_level;
else
m_aniso_filter = 0;
m_features.broken_point_sampler = true; // Not technically the case but the most common reason to use DX11 is because you're on AMD
m_features.geometry_shader = true;
m_features.image_load_store = false;
m_features.texture_barrier = false;
}
bool GSDevice11::SetFeatureLevel(D3D_FEATURE_LEVEL level, bool compat_mode)
@ -1472,6 +1477,255 @@ void GSDevice11::CompileShader(const std::string& source, const char* fn, ID3DIn
throw GSRecoverableError();
}
static GSDevice11::VSConstantBuffer convertCB(const GSHWDrawConfig::VSConstantBuffer& cb)
{
GSDevice11::VSConstantBuffer out;
out.VertexScale = GSVector4(cb.vertex_scale.x, -cb.vertex_scale.y, ldexpf(1, -32), 0.0f);
out.VertexOffset = GSVector4(cb.vertex_offset.x, -cb.vertex_offset.y, 0.0f, -1.0f);
out.Texture_Scale_Offset = GSVector4::loadl(&cb.texture_scale).upld(GSVector4::loadl(&cb.texture_offset));
out.MaxDepth = cb.max_depth;
return out;
}
static GSDevice11::GSConstantBuffer convertCBGS(const GSHWDrawConfig::VSConstantBuffer& cb)
{
GSDevice11::GSConstantBuffer out;
out.PointSize = cb.point_size;
return out;
}
static GSDevice11::PSConstantBuffer convertCB(const GSHWDrawConfig::PSConstantBuffer& cb, int atst)
{
GSDevice11::PSConstantBuffer out;
out.FogColor_AREF = GSVector4(GSVector4i::load(cb.fog_color_aref).u8to32());
if (atst == 1 || atst == 2) // Greater / Less alpha
out.FogColor_AREF.w -= 0.1f;
out.HalfTexel = cb.half_texel;
out.WH = cb.texture_size;
out.MinMax = cb.uv_min_max;
const GSVector4 ta_af(GSVector4i::load(cb.ta_af).u8to32());
out.MinF_TA = (GSVector4(out.MskFix) + 0.5f).xyxy(ta_af) / out.WH.xyxy(GSVector4(255, 255));
out.MskFix = GSVector4i::loadl(&cb.uv_msk_fix).u16to32();
out.ChannelShuffle = GSVector4i::load(cb.channel_shuffle_int).u8to32();
out.FbMask = GSVector4i::load(cb.fbmask_int).u8to32();
out.TC_OffsetHack = GSVector4(cb.tc_offset.x, cb.tc_offset.y).xyxy();
out.Af_MaxDepth = GSVector4(ta_af.a / 128.f, cb.max_depth * ldexpf(1, -32));
GSVector4i dither = GSVector4i::loadl(&cb.dither_matrix).u8to16();
const GSVector4i ditherLow = dither.sll16(13).sra16(13);
const GSVector4i ditherHi = dither.sll16(9).sra16(5);
dither = ditherLow.blend8(ditherHi, GSVector4i(0xFF00FF00));
out.DitherMatrix[0] = GSVector4(dither.xxxx().i8to32());
out.DitherMatrix[1] = GSVector4(dither.yyyy().i8to32());
out.DitherMatrix[2] = GSVector4(dither.zzzz().i8to32());
out.DitherMatrix[3] = GSVector4(dither.wwww().i8to32());
return out;
}
static GSDevice11::OMDepthStencilSelector convertSel(GSHWDrawConfig::DepthStencilSelector sel)
{
GSDevice11::OMDepthStencilSelector out;
out.zwe = sel.zwe;
out.ztst = sel.ztst;
out.date = sel.date;
out.date_one = sel.date_one;
out.fba = 0; // No longer seems to be in use?
return out;
}
static GSDevice11::OMBlendSelector convertSel(GSHWDrawConfig::ColorMaskSelector cm, GSHWDrawConfig::BlendState blend)
{
GSDevice11::OMBlendSelector out;
out.wrgba = cm.wrgba;
out.abe = blend.index != 0;
out.blend_index = blend.index;
out.accu_blend = blend.is_accumulation;
out.blend_mix = blend.is_mixed_hw_sw;
return out;
}
static GSDevice11::VSSelector convertSel(GSHWDrawConfig::VSSelector sel)
{
GSDevice11::VSSelector out;
out.tme = sel.tme;
out.fst = sel.fst;
return out;
}
static GSDevice11::PSSelector convertSel(GSHWDrawConfig::PSSelector sel)
{
GSDevice11::PSSelector out;
out.fmt = sel.pal_fmt << 2 | sel.aem_fmt;
out.dfmt = sel.dfmt;
out.depth_fmt = sel.depth_fmt;
out.aem = sel.aem;
out.fba = sel.fba;
out.fog = sel.fog;
out.atst = sel.atst;
out.fst = sel.fst;
out.tfx = sel.tfx;
out.tcc = sel.tcc;
out.wms = sel.wms;
out.wmt = sel.wmt;
out.ltf = sel.ltf;
out.shuffle = sel.shuffle;
out.read_ba = sel.read_ba;
out.fbmask = sel.fbmask;
out.hdr = sel.hdr;
out.blend_a = sel.blend_a;
out.blend_b = sel.blend_b;
out.blend_c = sel.blend_c;
out.blend_d = sel.blend_d;
out.clr1 = sel.clr1;
out.colclip = sel.colclip;
out.pabe = sel.pabe;
out.channel = sel.channel;
out.dither = sel.dither;
out.zclamp = sel.zclamp;
out.tcoffsethack = sel.tcoffsethack;
out.urban_chaos_hle = sel.urban_chaos_hle;
out.tales_of_abyss_hle = sel.tales_of_abyss_hle;
out.point_sampler = sel.point_sampler;
out.invalid_tex0 = sel.invalid_tex0;
return out;
}
static GSDevice11::GSSelector convertSel(GSHWDrawConfig::GSSelector sel)
{
GSDevice11::GSSelector out;
out.iip = sel.iip;
switch (sel.topology)
{
case GSHWDrawConfig::GSTopology::Point:
out.point = sel.expand;
out.prim = GS_POINT_CLASS;
break;
case GSHWDrawConfig::GSTopology::Line:
out.line = sel.expand;
out.prim = GS_LINE_CLASS;
break;
case GSHWDrawConfig::GSTopology::Triangle:
out.prim = GS_TRIANGLE_CLASS;
break;
case GSHWDrawConfig::GSTopology::Sprite:
out.cpu_sprite = !sel.expand;
out.prim = GS_SPRITE_CLASS;
break;
}
return out;
}
static GSDevice11::PSSamplerSelector convertSel(GSHWDrawConfig::SamplerSelector sel)
{
GSDevice11::PSSamplerSelector out;
out.tau = sel.tau;
out.tav = sel.tav;
out.ltf = sel.biln;
return out;
}
void GSDevice11::RenderHW(GSHWDrawConfig& config)
{
ASSERT(!config.require_full_barrier); // We always specify no support so it shouldn't request this
if (config.destination_alpha != GSHWDrawConfig::DestinationAlphaMode::Off)
{
const GSVector4 src = GSVector4(config.scissor) / GSVector4(config.ds->GetSize()).xyxy();
const GSVector4 dst = src * 2.0f - 1.0f;
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, -dst.y, 0.5f, 1.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, -dst.y, 0.5f, 1.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, -dst.w, 0.5f, 1.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, -dst.w, 0.5f, 1.0f), GSVector2(src.z, src.w)},
};
SetupDATE(config.rt, config.ds, vertices, config.datm);
}
GSTexture* hdr_rt = nullptr;
if (config.ps.hdr)
{
const GSVector2i size = config.rt->GetSize();
const GSVector4 dRect(config.scissor);
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
hdr_rt = CreateRenderTarget(size.x, size.y, GSTexture::Format::FloatColor);
hdr_rt->CommitRegion(GSVector2i(config.scissor.z, config.scissor.w));
// Warning: StretchRect must be called before BeginScene otherwise
// vertices will be overwritten. Trust me you don't want to do that.
StretchRect(config.rt, sRect, hdr_rt, dRect, ShaderConvert::COPY, false);
}
BeginScene();
void* ptr = nullptr;
if (IAMapVertexBuffer(&ptr, sizeof(*config.verts), config.nverts))
{
GSVector4i::storent(ptr, config.verts, config.nverts * sizeof(*config.verts));
IAUnmapVertexBuffer();
}
IASetIndexBuffer(config.indices, config.nindices);
D3D11_PRIMITIVE_TOPOLOGY topology;
switch (config.topology)
{
case GSHWDrawConfig::Topology::Point: topology = D3D11_PRIMITIVE_TOPOLOGY_POINTLIST; break;
case GSHWDrawConfig::Topology::Line: topology = D3D11_PRIMITIVE_TOPOLOGY_LINELIST; break;
case GSHWDrawConfig::Topology::Triangle: topology = D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST; break;
}
IASetPrimitiveTopology(topology);
PSSetShaderResources(config.tex, config.pal);
PSSetShaderResource(4, config.raw_tex);
if (config.require_one_barrier) // Used as "bind rt" flag when texture barrier is unsupported
{
// Bind the RT.This way special effect can use it.
// Do not always bind the rt when it's not needed,
// only bind it when effects use it such as fbmask emulation currently
// because we copy the frame buffer and it is quite slow.
PSSetShaderResource(3, config.rt);
}
const VSConstantBuffer cb_vs = convertCB(config.cb_vs);
const GSConstantBuffer cb_gs = convertCBGS(config.cb_vs);
PSConstantBuffer cb_ps = convertCB(config.cb_ps, config.ps.atst);
SetupOM(convertSel(config.depth), convertSel(config.colormask, config.blend), config.blend.factor);
SetupVS(convertSel(config.vs), &cb_vs);
SetupGS(convertSel(config.gs), &cb_gs);
SetupPS(convertSel(config.ps), &cb_ps, convertSel(config.sampler));
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
DrawIndexedPrimitive();
if (config.alpha_second_pass.enable)
{
if (0 != memcmp(&config.cb_ps, &config.alpha_second_pass.cb_ps, sizeof(config.cb_ps)))
{
cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst);
}
SetupPS(convertSel(config.alpha_second_pass.ps), &cb_ps, convertSel(config.sampler));
SetupOM(convertSel(config.alpha_second_pass.depth), convertSel(config.alpha_second_pass.colormask, config.blend), config.blend.factor);
DrawIndexedPrimitive();
}
EndScene();
if (hdr_rt)
{
const GSVector2i size = config.rt->GetSize();
const GSVector4 dRect(config.scissor);
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
StretchRect(hdr_rt, sRect, config.rt, dRect, ShaderConvert::MOD_256, false);
Recycle(hdr_rt);
}
}
u16 GSDevice11::ConvertBlendEnum(u16 generic)
{
switch (generic)

View File

@ -587,6 +587,8 @@ public:
void SetupPS(PSSelector sel, const PSConstantBuffer* cb, PSSamplerSelector ssel);
void SetupOM(OMDepthStencilSelector dssel, OMBlendSelector bsel, u8 afix);
void RenderHW(GSHWDrawConfig& config) final;
ID3D11Device* operator->() { return m_dev.get(); }
operator ID3D11Device*() { return m_dev.get(); }
operator ID3D11DeviceContext*() { return m_ctx.get(); }

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
/* PCSX2 - PS2 Emulator for PCs
* Copyright (C) 2002-2021 PCSX2 Dev Team
*
* PCSX2 is free software: you can redistribute it and/or modify it under the terms
* of the GNU Lesser General Public License as published by the Free Software Found-
* ation, either version 3 of the License, or (at your option) any later version.
*
* PCSX2 is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with PCSX2.
* If not, see <http://www.gnu.org/licenses/>.
*/
#pragma once
#include "GS/Renderers/HW/GSRendererHW.h"
#include "GS/Renderers/HW/GSVertexHW.h"
class GSRendererNew final : public GSRendererHW
{
enum PRIM_OVERLAP
{
PRIM_OVERLAP_UNKNOW,
PRIM_OVERLAP_YES,
PRIM_OVERLAP_NO
};
private:
PRIM_OVERLAP m_prim_overlap;
std::vector<size_t> m_drawlist;
TriFiltering UserHacks_tri_filter;
GSHWDrawConfig m_conf;
private:
inline void ResetStates();
inline void SetupIA(const float& sx, const float& sy);
inline void EmulateTextureShuffleAndFbmask();
inline void EmulateChannelShuffle(GSTexture** rt, const GSTextureCache::Source* tex);
inline void EmulateBlending(bool& DATE_GL42, bool& DATE_GL45);
inline void EmulateTextureSampler(const GSTextureCache::Source* tex);
inline void EmulateZbuffer();
inline void EmulateATST(GSHWDrawConfig::PSConstantBuffer& cb, GSHWDrawConfig::PSSelector& ps, bool pass_2);
public:
GSRendererNew();
~GSRendererNew() override {}
bool CreateDevice(GSDevice* dev, const WindowInfo& wi) override;
void DrawPrims(GSTexture* rt, GSTexture* ds, GSTextureCache::Source* tex) override;
PRIM_OVERLAP PrimitiveOverlap();
bool IsDummyTexture() const override;
};

View File

@ -83,6 +83,11 @@ GSDeviceOGL::GSDeviceOGL()
m_debug_gl_call = theApp.GetConfigB("debug_opengl");
m_disable_hw_gl_draw = theApp.GetConfigB("disable_hw_gl_draw");
m_features.broken_point_sampler = GLLoader::vendor_id_amd;
m_features.geometry_shader = GLLoader::found_geometry_shader;
m_features.image_load_store = GLLoader::found_GL_ARB_shader_image_load_store && GLLoader::found_GL_ARB_clear_texture;
m_features.texture_barrier = true;
}
GSDeviceOGL::~GSDeviceOGL()
@ -2012,6 +2017,270 @@ void GSDeviceOGL::SetupOM(OMDepthStencilSelector dssel)
OMSetDepthStencilState(m_om_dss[dssel]);
}
static GSDeviceOGL::VSConstantBuffer convertCB(const GSHWDrawConfig::VSConstantBuffer& cb)
{
GSDeviceOGL::VSConstantBuffer out;
out.Vertex_Scale_Offset = GSVector4::loadl(&cb.vertex_scale).upld(GSVector4::loadl(&cb.vertex_offset));
out.Texture_Scale_Offset = GSVector4::loadl(&cb.texture_scale).upld(GSVector4::loadl(&cb.texture_offset));
out.PointSize = cb.point_size;
out.MaxDepth = cb.max_depth;
return out;
}
static GSDeviceOGL::PSConstantBuffer convertCB(const GSHWDrawConfig::PSConstantBuffer& cb, int atst)
{
GSDeviceOGL::PSConstantBuffer out;
out.FogColor_AREF = GSVector4(GSVector4i::load(cb.fog_color_aref).u8to32());
if (atst == 1 || atst == 2) // Greater / Less alpha
out.FogColor_AREF.w -= 0.1f;
out.WH = cb.texture_size;
out.TA_MaxDepth_Af = GSVector4(GSVector4i::load(cb.ta_af).u8to32()) / GSVector4(255.f, 255.f, 1.f, 128.f);
out.TA_MaxDepth_Af.z = cb.max_depth * ldexpf(1, -32);
out.MskFix = GSVector4i::loadl(&cb.uv_msk_fix).u16to32();
out.FbMask = GSVector4i::load(cb.fbmask_int).u8to32();
out.HalfTexel = cb.half_texel;
out.MinMax = cb.uv_min_max;
out.TC_OH = GSVector4::zero().upld(GSVector4(cb.tc_offset));
GSVector4i dither = GSVector4i::loadl(&cb.dither_matrix).u8to16();
const GSVector4i ditherLow = dither.sll16(13).sra16(13);
const GSVector4i ditherHi = dither.sll16( 9).sra16( 5);
dither = ditherLow.blend8(ditherHi, GSVector4i(0xFF00FF00));
out.DitherMatrix[0] = GSVector4(dither.xxxx().i8to32());
out.DitherMatrix[1] = GSVector4(dither.yyyy().i8to32());
out.DitherMatrix[2] = GSVector4(dither.zzzz().i8to32());
out.DitherMatrix[3] = GSVector4(dither.wwww().i8to32());
return out;
}
static GSDeviceOGL::VSSelector convertSel(const GSHWDrawConfig::VSSelector sel)
{
GSDeviceOGL::VSSelector out;
out.int_fst = !sel.fst;
return out;
}
void GSDeviceOGL::RenderHW(GSHWDrawConfig& config)
{
glScissor(config.scissor.x, config.scissor.y, config.scissor.width(), config.scissor.height());
GLState::scissor = config.scissor;
// Destination Alpha Setup
switch (config.destination_alpha)
{
case GSHWDrawConfig::DestinationAlphaMode::Off:
case GSHWDrawConfig::DestinationAlphaMode::Full:
break; // No setup
case GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking:
InitPrimDateTexture(config.rt, config.scissor);
break;
case GSHWDrawConfig::DestinationAlphaMode::StencilOne:
ClearStencil(config.ds, 1);
break;
case GSHWDrawConfig::DestinationAlphaMode::Stencil:
{
const GSVector4 src = GSVector4(config.scissor) / GSVector4(config.ds->GetSize()).xyxy();
const GSVector4 dst = src * 2.f - 1.f;
GSVertexPT1 vertices[] =
{
{GSVector4(dst.x, dst.y, 0.0f, 0.0f), GSVector2(src.x, src.y)},
{GSVector4(dst.z, dst.y, 0.0f, 0.0f), GSVector2(src.z, src.y)},
{GSVector4(dst.x, dst.w, 0.0f, 0.0f), GSVector2(src.x, src.w)},
{GSVector4(dst.z, dst.w, 0.0f, 0.0f), GSVector2(src.z, src.w)},
};
SetupDATE(config.rt, config.ds, vertices, config.datm);
}
}
GSTexture* hdr_rt = nullptr;
if (config.ps.hdr)
{
GSVector2i size = config.rt->GetSize();
hdr_rt = CreateRenderTarget(size.x, size.y, GSTexture::Format::FloatColor);
hdr_rt->CommitRegion(GSVector2i(config.scissor.z, config.scissor.w));
OMSetRenderTargets(hdr_rt, config.ds, &config.scissor);
// save blend state, since BlitRect destroys it
const bool old_blend = GLState::blend;
BlitRect(config.rt, config.scissor, config.rt->GetSize(), false, false);
if (old_blend)
{
GLState::blend = old_blend;
glEnable(GL_BLEND);
}
}
BeginScene();
IASetVertexBuffer(config.verts, config.nverts);
IASetIndexBuffer(config.indices, config.nindices);
GLenum topology = 0;
switch (config.topology)
{
case GSHWDrawConfig::Topology::Point: topology = GL_POINTS; break;
case GSHWDrawConfig::Topology::Line: topology = GL_LINES; break;
case GSHWDrawConfig::Topology::Triangle: topology = GL_TRIANGLES; break;
}
IASetPrimitiveTopology(topology);
PSSetShaderResources(config.tex, config.pal);
PSSetShaderResource(4, config.raw_tex);
// Always bind the RT. This way special effect can use it.
PSSetShaderResource(3, config.rt);
SetupSampler(PSSamplerSelector(config.sampler.key));
OMSetBlendState(config.blend.index, config.blend.factor, config.blend.is_constant, config.blend.is_accumulation, config.blend.is_mixed_hw_sw);
OMSetColorMaskState(OMColorMaskSelector(config.colormask.key));
SetupOM(OMDepthStencilSelector(config.depth.key));
VSConstantBuffer cb_vs = convertCB(config.cb_vs);
PSConstantBuffer cb_ps = convertCB(config.cb_ps, config.ps.atst);
SetupCB(&cb_vs, &cb_ps);
if (config.cb_ps.channel_shuffle_int)
{
SetupCBMisc(GSVector4i::load(config.cb_ps.channel_shuffle_int).u8to32());
}
GSSelector gssel;
if (config.gs.expand)
{
switch (config.gs.topology)
{
case GSHWDrawConfig::GSTopology::Point: gssel.point = 1; break;
case GSHWDrawConfig::GSTopology::Line: gssel.line = 1; break;
case GSHWDrawConfig::GSTopology::Sprite: gssel.sprite = 1; break;
case GSHWDrawConfig::GSTopology::Triangle: ASSERT(0); break;
}
}
PSSelector pssel;
pssel.key = config.ps.key;
const VSSelector vssel = convertSel(config.vs);
SetupPipeline(vssel, gssel, pssel);
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
{
GL_PUSH("Date GL42");
// It could be good idea to use stencil in the same time.
// Early stencil test will reduce the number of atomic-load operation
// Create an r32i image that will contain primitive ID
// Note: do it at the beginning because the clean will dirty the FBO state
//dev->InitPrimDateTexture(rtsize.x, rtsize.y);
// I don't know how much is it legal to mount rt as Texture/RT. No write is done.
// In doubt let's detach RT.
OMSetRenderTargets(NULL, config.ds, &config.scissor);
// Don't write anything on the color buffer
// Neither in the depth buffer
glDepthMask(false);
// Compute primitiveID max that pass the date test (Draw without barrier)
DrawIndexedPrimitive();
// Ask PS to discard shader above the primitiveID max
glDepthMask(GLState::depth_mask);
pssel.date = 3;
config.ps.date = 3;
config.alpha_second_pass.ps.date = 3;
SetupPipeline(vssel, gssel, pssel);
// Be sure that first pass is finished !
Barrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
}
OMSetRenderTargets(hdr_rt ? hdr_rt : config.rt, config.ds, &config.scissor);
SendHWDraw(config);
if (config.alpha_second_pass.enable)
{
if (0 != memcmp(&config.cb_ps, &config.alpha_second_pass.cb_ps, sizeof(config.cb_ps)))
{
cb_ps = convertCB(config.alpha_second_pass.cb_ps, config.alpha_second_pass.ps.atst);
SetupCB(&cb_vs, &cb_ps);
}
pssel.key = config.alpha_second_pass.ps.key;
SetupPipeline(vssel, gssel, pssel);
OMSetColorMaskState(OMColorMaskSelector(config.alpha_second_pass.colormask.key));
SetupOM(OMDepthStencilSelector(config.alpha_second_pass.depth.key));
SendHWDraw(config);
}
if (config.destination_alpha == GSHWDrawConfig::DestinationAlphaMode::PrimIDTracking)
RecycleDateTexture();
EndScene();
// Warning: EndScene must be called before StretchRect otherwise
// vertices will be overwritten. Trust me you don't want to do that.
if (hdr_rt)
{
GSVector2i size = config.rt->GetSize();
GSVector4 dRect(config.scissor);
const GSVector4 sRect = dRect / GSVector4(size.x, size.y).xyxy();
StretchRect(hdr_rt, sRect, config.rt, dRect, ShaderConvert::MOD_256, false);
Recycle(hdr_rt);
}
}
void GSDeviceOGL::SendHWDraw(const GSHWDrawConfig& config)
{
if (config.drawlist)
{
GL_PUSH("Split the draw (SPRITE)");
#if defined(_DEBUG)
// Check how draw call is split.
std::map<size_t, size_t> frequency;
for (const auto& it : *config.drawlist)
++frequency[it];
std::string message;
for (const auto& it : frequency)
message += " " + std::to_string(it.first) + "(" + std::to_string(it.second) + ")";
GL_PERF("Split single draw (%d sprites) into %zu draws: consecutive draws(frequency):%s",
config.nindices / config.indices_per_prim, config.drawlist->size(), message.c_str());
#endif
for (size_t count = 0, p = 0, n = 0; n < config.drawlist->size(); p += count, ++n)
{
count = (*config.drawlist)[n] * config.indices_per_prim;
glTextureBarrier();
DrawIndexedPrimitive(p, count);
}
}
else if (config.require_full_barrier)
{
GL_PUSH("Split the draw");
GL_PERF("Split single draw in %d draw", config.nindices / config.indices_per_prim);
for (size_t p = 0; p < config.nindices; p += config.indices_per_prim)
{
glTextureBarrier();
DrawIndexedPrimitive(p, config.indices_per_prim);
}
}
else if (config.require_one_barrier)
{
// One barrier needed
glTextureBarrier();
DrawIndexedPrimitive();
}
else
{
// No barriers needed
DrawIndexedPrimitive();
}
}
// Note: used as a callback of DebugMessageCallback. Don't change the signature
void GSDeviceOGL::DebugOutputToFile(GLenum gl_source, GLenum gl_type, GLuint id, GLenum gl_severity, GLsizei gl_length, const GLchar* gl_message, const void* userParam)
{

View File

@ -614,6 +614,9 @@ public:
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, bool red, bool green, bool blue, bool alpha) final;
void StretchRect(GSTexture* sTex, const GSVector4& sRect, GSTexture* dTex, const GSVector4& dRect, GLuint ps, int bs, OMColorMaskSelector cms, bool linear = true);
void RenderHW(GSHWDrawConfig& config) final;
void SendHWDraw(const GSHWDrawConfig& config);
void SetupDATE(GSTexture* rt, GSTexture* ds, const GSVertexPT1* vertices, bool datm);
void IASetPrimitiveTopology(GLenum topology);

View File

@ -488,6 +488,7 @@
<ClCompile Include="GS\Renderers\Common\GSRenderer.cpp" />
<ClCompile Include="GS\Renderers\DX11\GSRendererDX11.cpp" />
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp" />
<ClCompile Include="GS\Renderers\HW\GSRendererNew.cpp" />
<ClCompile Include="GS\Renderers\Null\GSRendererNull.cpp" />
<ClCompile Include="GS\Renderers\OpenGL\GSRendererOGL.cpp" />
<ClCompile Include="GS\Renderers\SW\GSRendererSW.cpp" />
@ -849,6 +850,7 @@
<ClInclude Include="GS\Renderers\Common\GSRenderer.h" />
<ClInclude Include="GS\Renderers\DX11\GSRendererDX11.h" />
<ClInclude Include="GS\Renderers\HW\GSRendererHW.h" />
<ClInclude Include="GS\Renderers\HW\GSRendererNew.h" />
<ClInclude Include="GS\Renderers\Null\GSRendererNull.h" />
<ClInclude Include="GS\Renderers\OpenGL\GSRendererOGL.h" />
<ClInclude Include="GS\Renderers\SW\GSRendererSW.h" />

View File

@ -1553,6 +1553,9 @@
<ClCompile Include="GS\Renderers\HW\GSRendererHW.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\HW\GSRendererNew.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
<ClCompile Include="GS\Renderers\HW\GSTextureCache.cpp">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClCompile>
@ -2654,6 +2657,9 @@
<ClInclude Include="GS\Renderers\HW\GSRendererHW.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\HW\GSRendererNew.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>
<ClInclude Include="GS\Renderers\HW\GSTextureCache.h">
<Filter>System\Ps2\GS\Renderers\Hardware</Filter>
</ClInclude>