Merge pull request #1812 from phire/real_zfreeze

Add proper zfreeze support.
This commit is contained in:
Tony Wasserka 2015-01-24 13:29:57 +01:00
commit 43036af944
20 changed files with 346 additions and 123 deletions

View File

@ -64,7 +64,7 @@ static Common::Event g_compressAndDumpStateSyncEvent;
static std::thread g_save_thread;
// Don't forget to increase this after doing changes on the savestate system
static const u32 STATE_VERSION = 38;
static const u32 STATE_VERSION = 39;
enum
{

View File

@ -59,7 +59,7 @@ DXGI_FORMAT VarToD3D(VarType t, int size, bool integer)
void D3DVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl)
{
vertex_stride = _vtx_decl.stride;
vtx_decl = _vtx_decl;
memset(m_elems, 0, sizeof(m_elems));
const AttributeFormat* format = &_vtx_decl.position;

View File

@ -33,6 +33,7 @@
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
@ -231,6 +232,7 @@ Renderer::Renderer(void *&window_handle)
s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0;
s_last_xfb_mode = g_ActiveConfig.bUseRealXFB;
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
PixelShaderManager::SetEfbScaleChanged();
SetupDeviceObjects();
@ -946,6 +948,8 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co
s_last_stereo_mode = g_ActiveConfig.iStereoMode > 0;
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
PixelShaderManager::SetEfbScaleChanged();
D3D::context->OMSetRenderTargets(1, &D3D::GetBackBuffer()->GetRTV(), nullptr);
delete g_framebuffer_manager;

View File

@ -58,7 +58,7 @@ static void SetPointer(u32 attrib, u32 stride, const AttributeFormat &format)
void GLVertexFormat::Initialize(const PortableVertexDeclaration &_vtx_decl)
{
this->vtx_decl = _vtx_decl;
vertex_stride = vtx_decl.stride;
u32 vertex_stride = _vtx_decl.stride;
// We will not allow vertex components causing uneven strides.
if (vertex_stride & 3)

View File

@ -43,6 +43,7 @@
#include "VideoCommon/ImageWrite.h"
#include "VideoCommon/OnScreenDisplay.h"
#include "VideoCommon/PixelEngine.h"
#include "VideoCommon/PixelShaderManager.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexShaderGen.h"
@ -618,6 +619,8 @@ Renderer::Renderer()
s_last_efb_scale = g_ActiveConfig.iEFBScale;
CalculateTargetSize(s_backbuffer_width, s_backbuffer_height);
PixelShaderManager::SetEfbScaleChanged();
// Because of the fixed framebuffer size we need to disable the resolution
// options while running
g_Config.bRunning = true;
@ -1681,6 +1684,8 @@ void Renderer::SwapImpl(u32 xfbAddr, u32 fbWidth, u32 fbStride, u32 fbHeight, co
delete g_framebuffer_manager;
g_framebuffer_manager = new FramebufferManager(s_target_width, s_target_height,
s_MSAASamples);
PixelShaderManager::SetEfbScaleChanged();
}
}

View File

@ -43,6 +43,8 @@ static size_t s_index_offset;
VertexManager::VertexManager()
{
CreateDeviceObjects();
CpuVBuffer.resize(MAX_VBUFFER_SIZE);
CpuIBuffer.resize(MAX_IBUFFER_SIZE);
}
VertexManager::~VertexManager()
@ -81,14 +83,25 @@ void VertexManager::PrepareDrawBuffers(u32 stride)
void VertexManager::ResetBuffer(u32 stride)
{
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first;
s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE;
s_baseVertex = buffer.second / stride;
if (CullAll)
{
// This buffer isn't getting sent to the GPU. Just allocate it on the cpu.
s_pCurBufferPointer = s_pBaseBufferPointer = CpuVBuffer.data();
s_pEndBufferPointer = s_pBaseBufferPointer + CpuVBuffer.size();
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
IndexGenerator::Start((u16*)buffer.first);
s_index_offset = buffer.second;
IndexGenerator::Start((u16*)CpuIBuffer.data());
}
else
{
auto buffer = s_vertexBuffer->Map(MAXVBUFFERSIZE, stride);
s_pCurBufferPointer = s_pBaseBufferPointer = buffer.first;
s_pEndBufferPointer = buffer.first + MAXVBUFFERSIZE;
s_baseVertex = buffer.second / stride;
buffer = s_indexBuffer->Map(MAXIBUFFERSIZE * sizeof(u16));
IndexGenerator::Start((u16*)buffer.first);
s_index_offset = buffer.second;
}
}
void VertexManager::Draw(u32 stride)

View File

@ -13,8 +13,6 @@ namespace OGL
{
class GLVertexFormat : public NativeVertexFormat
{
PortableVertexDeclaration vtx_decl;
public:
GLVertexFormat();
~GLVertexFormat();
@ -42,10 +40,15 @@ public:
GLuint m_last_vao;
protected:
virtual void ResetBuffer(u32 stride) override;
private:
void Draw(u32 stride);
void vFlush(bool useDstAlpha) override;
void PrepareDrawBuffers(u32 stride);
// Alternative buffers in CPU memory for primatives we are going to discard.
std::vector<u8> CpuVBuffer;
std::vector<u16> CpuIBuffer;
};
}

View File

@ -23,6 +23,8 @@ struct PixelShaderConstants
int4 fogcolor;
int4 fogi;
float4 fogf[2];
float4 zslope;
float4 efbscale;
};
struct VertexShaderConstants

View File

@ -26,7 +26,11 @@ void GeometryShaderManager::Init()
{
memset(&constants, 0, sizeof(constants));
Dirty();
// Init any intial constants which aren't zero when bpmem is zero.
SetViewportChanged();
SetProjectionChanged();
dirty = true;
}
void GeometryShaderManager::Shutdown()
@ -35,12 +39,9 @@ void GeometryShaderManager::Shutdown()
void GeometryShaderManager::Dirty()
{
SetViewportChanged();
SetProjectionChanged();
SetLinePtWidthChanged();
for (int i = 0; i < 8; i++)
SetTexCoordChanged(i);
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
s_projection_changed = true;
dirty = true;
}
@ -110,9 +111,14 @@ void GeometryShaderManager::SetTexCoordChanged(u8 texmapid)
void GeometryShaderManager::DoState(PointerWrap &p)
{
p.Do(s_projection_changed);
p.Do(s_viewport_changed);
p.Do(constants);
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Reload current state from global GPU state
// Fixup the current state from global GPU state
// NOTE: This requires that all GPU memory has been loaded already.
Dirty();
}

View File

@ -109,7 +109,8 @@ public:
virtual void Initialize(const PortableVertexDeclaration &vtx_decl) = 0;
virtual void SetupVertexPointers() = 0;
u32 GetVertexStride() const { return vertex_stride; }
u32 GetVertexStride() const { return vtx_decl.stride; }
const PortableVertexDeclaration& GetVertexDeclaration() const { return vtx_decl; }
// TODO: move this under private:
u32 m_components; // VB_HAS_X. Bitmask telling what vertex components are present.
@ -118,5 +119,5 @@ protected:
// Let subclasses construct.
NativeVertexFormat() {}
u32 vertex_stride;
PortableVertexDeclaration vtx_decl;
};

View File

@ -144,6 +144,7 @@ template<class T> static inline void WriteTevRegular(T& out, const char* compone
template<class T> static inline void SampleTexture(T& out, const char *texcoords, const char *texswap, int texmap, API_TYPE ApiType);
template<class T> static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType,DSTALPHA_MODE dstAlphaMode, bool per_pixel_depth);
template<class T> static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data);
template<class T> static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType);
template<class T>
static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
@ -228,6 +229,8 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
"\tint4 " I_FOGCOLOR";\n"
"\tint4 " I_FOGI";\n"
"\tfloat4 " I_FOGF"[2];\n"
"\tfloat4 " I_ZSLOPE";\n"
"\tfloat4 " I_EFBSCALE";\n"
"};\n");
if (g_ActiveConfig.bEnablePixelLighting)
@ -268,8 +271,12 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
GenerateVSOutputMembers<T>(out, ApiType);
out.Write("};\n");
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z);
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest()
&& (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED)
// We can't allow early_ztest for zfreeze because depth is overridden per-pixel.
// This means it's impossible for zcomploc to be emulated on a zfrozen polygon.
&& !bpmem.genMode.zfreeze;
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
if (forced_early_z)
{
@ -362,7 +369,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
out.Write("void main(\n");
out.Write(" out float4 ocol0 : SV_Target0,%s%s\n in float4 rawpos : SV_Position,\n",
dstAlphaMode == DSTALPHA_DUAL_SOURCE_BLEND ? "\n out float4 ocol1 : SV_Target1," : "",
per_pixel_depth ? "\n out float depth : SV_Depth," : "");
(per_pixel_depth && bpmem.zmode.testenable) ? "\n out float depth : SV_Depth," : "");
out.Write(" in centroid float4 colors_0 : COLOR0,\n");
out.Write(" in centroid float4 colors_1 : COLOR1\n");
@ -538,10 +545,13 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
uid_data->zfreeze = bpmem.genMode.zfreeze;
// Note: z-textures are not written to depth buffer if early depth test is used
if (per_pixel_depth && bpmem.UseEarlyDepthTest())
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
{
WritePerPixelDepth<T>(out, uid_data, ApiType);
}
// Note: depth texture output is only written to depth buffer if late depth test is used
// theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway
@ -555,7 +565,9 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
}
if (per_pixel_depth && bpmem.UseLateDepthTest())
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
{
WritePerPixelDepth<T>(out, uid_data, ApiType);
}
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
{
@ -1015,7 +1027,11 @@ static inline void WriteAlphaTest(T& out, pixel_shader_uid_data* uid_data, API_T
// Tests seem to have proven that writing depth even when the alpha test fails is more
// important that a reliable alpha test, so we just force the alpha test to always succeed.
// At least this seems to be less buggy.
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest() && bpmem.zmode.updateenable && !g_ActiveConfig.backend_info.bSupportsEarlyZ;
uid_data->alpha_test_use_zcomploc_hack = bpmem.UseEarlyDepthTest()
&& bpmem.zmode.updateenable
&& !g_ActiveConfig.backend_info.bSupportsEarlyZ
&& !bpmem.genMode.zfreeze;
if (!uid_data->alpha_test_use_zcomploc_hack)
{
out.Write("\t\tdiscard;\n");
@ -1095,6 +1111,29 @@ static inline void WriteFog(T& out, pixel_shader_uid_data* uid_data)
out.Write("\tprev.rgb = (prev.rgb * (256 - ifog) + " I_FOGCOLOR".rgb * ifog) >> 8;\n");
}
template<class T>
static inline void WritePerPixelDepth(T& out, pixel_shader_uid_data* uid_data, API_TYPE ApiType)
{
if (bpmem.genMode.zfreeze)
{
out.SetConstantsUsed(C_ZSLOPE, C_ZSLOPE);
out.SetConstantsUsed(C_EFBSCALE, C_EFBSCALE);
out.Write("\tfloat2 screenpos = rawpos.xy * " I_EFBSCALE".xy;\n");
// Opengl has reversed vertical screenspace coordiantes
if (ApiType == API_OPENGL)
out.Write("\tscreenpos.y = %i - screenpos.y;\n", EFB_HEIGHT);
out.Write("\tdepth = float(" I_ZSLOPE".z + " I_ZSLOPE".x * screenpos.x + " I_ZSLOPE".y * screenpos.y) / float(0xFFFFFF);\n");
}
else
{
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
}
}
void GetPixelShaderUid(PixelShaderUid& object, DSTALPHA_MODE dstAlphaMode, API_TYPE ApiType, u32 components)
{
GeneratePixelShader<PixelShaderUid>(object, dstAlphaMode, ApiType, components);

View File

@ -21,8 +21,10 @@
#define C_FOGCOLOR (C_INDTEXMTX + 6) //27
#define C_FOGI (C_FOGCOLOR + 1) //28
#define C_FOGF (C_FOGI + 1) //29
#define C_ZSLOPE (C_FOGF + 2) //31
#define C_EFBSCALE (C_ZSLOPE + 1) //32
#define C_PENVCONST_END (C_FOGF + 2)
#define C_PENVCONST_END (C_EFBSCALE + 1)
// Different ways to achieve rendering with destination alpha
enum DSTALPHA_MODE
@ -63,6 +65,10 @@ struct pixel_shader_uid_data
u32 early_ztest : 1;
u32 bounding_box : 1;
// TODO: 31 bits of padding is a waste. Can we free up some bits elseware?
u32 zfreeze : 1;
u32 pad : 31;
u32 texMtxInfo_n_projection : 8; // 8x1 bit
u32 tevindref_bi0 : 3;
u32 tevindref_bc0 : 3;

View File

@ -15,41 +15,18 @@
bool PixelShaderManager::s_bFogRangeAdjustChanged;
bool PixelShaderManager::s_bViewPortChanged;
std::array<int4,4> PixelShaderManager::s_tev_color;
std::array<int4,4> PixelShaderManager::s_tev_konst_color;
PixelShaderConstants PixelShaderManager::constants;
bool PixelShaderManager::dirty;
void PixelShaderManager::Init()
{
memset(&constants, 0, sizeof(constants));
memset(s_tev_color.data(), 0, sizeof(s_tev_color));
memset(s_tev_konst_color.data(), 0, sizeof(s_tev_konst_color));
Dirty();
}
void PixelShaderManager::Dirty()
{
// Init any intial constants which aren't zero when bpmem is zero.
s_bFogRangeAdjustChanged = true;
s_bViewPortChanged = true;
s_bViewPortChanged = false;
for (unsigned index = 0; index < s_tev_color.size(); ++index)
{
for (int comp = 0; comp < 4; ++comp)
{
SetTevColor(index, comp, s_tev_color[index][comp]);
SetTevKonstColor(index, comp, s_tev_konst_color[index][comp]);
}
}
SetAlpha();
SetDestAlpha();
SetZTextureBias();
SetViewportChanged();
SetIndTexScaleChanged(false);
SetIndTexScaleChanged(true);
SetEfbScaleChanged();
SetIndMatrixChanged(0);
SetIndMatrixChanged(1);
SetIndMatrixChanged(2);
@ -62,8 +39,20 @@ void PixelShaderManager::Dirty()
SetTexCoordChanged(5);
SetTexCoordChanged(6);
SetTexCoordChanged(7);
SetFogColorChanged();
dirty = true;
}
void PixelShaderManager::Dirty()
{
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
s_bFogRangeAdjustChanged = true;
SetEfbScaleChanged();
SetFogParamChanged();
dirty = true;
}
void PixelShaderManager::Shutdown()
@ -117,7 +106,7 @@ void PixelShaderManager::SetConstants()
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
{
auto& c = constants.colors[index];
c[component] = s_tev_color[index][component] = value;
c[component] = value;
dirty = true;
PRIM_LOG("tev color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
@ -126,7 +115,7 @@ void PixelShaderManager::SetTevColor(int index, int component, s32 value)
void PixelShaderManager::SetTevKonstColor(int index, int component, s32 value)
{
auto& c = constants.kcolors[index];
c[component] = s_tev_konst_color[index][component] = value;
c[component] = value;
dirty = true;
PRIM_LOG("tev konst color%d: %d %d %d %d\n", index, c[0], c[1], c[2], c[3]);
@ -168,6 +157,21 @@ void PixelShaderManager::SetViewportChanged()
s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
}
void PixelShaderManager::SetEfbScaleChanged()
{
constants.efbscale[0] = 1.0f / float(Renderer::EFBToScaledXf(1));
constants.efbscale[1] = 1.0f / float(Renderer::EFBToScaledYf(1));
dirty = true;
}
void PixelShaderManager::SetZSlope(float dfdx, float dfdy, float f0)
{
constants.zslope[0] = dfdx;
constants.zslope[1] = dfdy;
constants.zslope[2] = f0;
dirty = true;
}
void PixelShaderManager::SetIndTexScaleChanged(bool high)
{
constants.indtexscale[high][0] = bpmem.texscale[high].ss0;
@ -278,12 +282,14 @@ void PixelShaderManager::SetFogRangeAdjustChanged()
void PixelShaderManager::DoState(PointerWrap &p)
{
p.DoArray(s_tev_color);
p.DoArray(s_tev_konst_color);
p.Do(s_bFogRangeAdjustChanged);
p.Do(s_bViewPortChanged);
p.Do(constants);
if (p.GetMode() == PointerWrap::MODE_READ)
{
// Reload current state from global GPU state
// Fixup the current state from global GPU state
// NOTE: This requires that all GPU memory has been loaded already.
Dirty();
}

View File

@ -36,6 +36,8 @@ public:
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetEfbScaleChanged();
static void SetZSlope(float dfdx, float dfdy, float f0);
static void SetIndMatrixChanged(int matrixidx);
static void SetTevKSelChanged(int id);
static void SetZTextureTypeChanged();
@ -50,9 +52,4 @@ public:
static bool s_bFogRangeAdjustChanged;
static bool s_bViewPortChanged;
// These colors aren't available from global BP state,
// hence we keep a copy of them around.
static std::array<int4,4> s_tev_color;
static std::array<int4,4> s_tev_konst_color;
};

View File

@ -291,6 +291,8 @@ static inline void AssignVSOutputMembers(T& object, const char* a, const char* b
#define I_FOGCOLOR "cfogcolor"
#define I_FOGI "cfogi"
#define I_FOGF "cfogf"
#define I_ZSLOPE "czslope"
#define I_EFBSCALE "cefbscale"
#define I_POSNORMALMATRIX "cpnmtx"
#define I_PROJECTION "cproj"

View File

@ -149,19 +149,20 @@ int RunVertices(int vtx_attr_group, int primitive, int count, DataReader src, bo
if ((int)src.size() < size)
return -1;
if (skip_drawing || (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5))
{
// if cull mode is CULL_ALL, ignore triangles and quads
if (skip_drawing)
return size;
}
// If the native vertex format changed, force a flush.
if (loader->m_native_vertex_format != s_current_vtx_fmt)
VertexManager::Flush();
s_current_vtx_fmt = loader->m_native_vertex_format;
// if cull mode is CULL_ALL, tell VertexManager to skip triangles and quads.
// They still need to go through vertex loading, because we need to calculate a zfreeze refrence slope.
bool cullall = (bpmem.genMode.cullmode == GenMode::CULL_ALL && primitive < 5);
DataReader dst = VertexManager::PrepareForAdditionalData(primitive, count,
loader->m_native_vtx_decl.stride);
loader->m_native_vtx_decl.stride, cullall);
count = loader->RunVertices(primitive, count, src, dst);

View File

@ -12,6 +12,7 @@
#include "VideoCommon/RenderBase.h"
#include "VideoCommon/Statistics.h"
#include "VideoCommon/TextureCacheBase.h"
#include "VideoCommon/VertexLoaderManager.h"
#include "VideoCommon/VertexManagerBase.h"
#include "VideoCommon/VertexShaderManager.h"
#include "VideoCommon/VideoConfig.h"
@ -25,7 +26,10 @@ u8 *VertexManager::s_pEndBufferPointer;
PrimitiveType VertexManager::current_primitive_type;
Slope VertexManager::ZSlope;
bool VertexManager::IsFlushed;
bool VertexManager::CullAll;
static const PrimitiveType primitive_from_gx[8] = {
PRIMITIVE_TRIANGLES, // GX_DRAW_QUADS
@ -41,6 +45,7 @@ static const PrimitiveType primitive_from_gx[8] = {
VertexManager::VertexManager()
{
IsFlushed = true;
CullAll = false;
}
VertexManager::~VertexManager()
@ -52,7 +57,7 @@ u32 VertexManager::GetRemainingSize()
return (u32)(s_pEndBufferPointer - s_pCurBufferPointer);
}
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride)
DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall)
{
// The SSE vertex loader can write up to 4 bytes past the end
u32 const needed_vertex_bytes = count * stride + 4;
@ -78,6 +83,8 @@ DataReader VertexManager::PrepareForAdditionalData(int primitive, u32 count, u32
"Increase MAXVBUFFERSIZE or we need primitive breaking after all.");
}
CullAll = cullall;
// need to alloc new buffer
if (IsFlushed)
{
@ -189,45 +196,66 @@ void VertexManager::Flush()
(int)bpmem.genMode.numtexgens, (u32)bpmem.dstalpha.enable, (bpmem.alpha_test.hex>>16)&0xff);
#endif
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
for (unsigned int i : usedtextures)
// If the primitave is marked CullAll. All we need to do is update the vertex constants and calculate the zfreeze refrence slope
if (!CullAll)
{
g_renderer->SetSamplerState(i & 3, i >> 2);
const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i);
BitSet32 usedtextures;
for (u32 i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevorders[i / 2].getEnable(i & 1))
usedtextures[bpmem.tevorders[i/2].getTexMap(i & 1)] = true;
if (tentry)
if (bpmem.genMode.numindstages > 0)
for (unsigned int i = 0; i < bpmem.genMode.numtevstages + 1u; ++i)
if (bpmem.tevind[i].IsActive() && bpmem.tevind[i].bt < bpmem.genMode.numindstages)
usedtextures[bpmem.tevindref.getTexMap(bpmem.tevind[i].bt)] = true;
for (unsigned int i : usedtextures)
{
// 0s are probably for no manual wrapping needed.
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0);
g_renderer->SetSamplerState(i & 3, i >> 2);
const TextureCache::TCacheEntryBase* tentry = TextureCache::Load(i);
if (tentry)
{
// 0s are probably for no manual wrapping needed.
PixelShaderManager::SetTexDims(i, tentry->native_width, tentry->native_height, 0, 0);
}
else
ERROR_LOG(VIDEO, "error loading texture");
}
else
ERROR_LOG(VIDEO, "error loading texture");
}
// set global constants
// set global vertex constants
VertexShaderManager::SetConstants();
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass &&
bpmem.dstalpha.enable &&
bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
// Calculate ZSlope for zfreeze
if (!bpmem.genMode.zfreeze)
{
// Must be done after VertexShaderManager::SetConstants()
CalculateZSlope(VertexLoaderManager::GetCurrentVertexFormat());
}
else if (ZSlope.dirty && !CullAll) // or apply any dirty ZSlopes
{
PixelShaderManager::SetZSlope(ZSlope.dfdx, ZSlope.dfdy, ZSlope.f0);
ZSlope.dirty = false;
}
if (PerfQueryBase::ShouldEmulate())
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
g_vertex_manager->vFlush(useDstAlpha);
if (PerfQueryBase::ShouldEmulate())
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
if (!CullAll)
{
// set the rest of the global constants
GeometryShaderManager::SetConstants();
PixelShaderManager::SetConstants();
bool useDstAlpha = !g_ActiveConfig.bDstAlphaPass &&
bpmem.dstalpha.enable &&
bpmem.blendmode.alphaupdate &&
bpmem.zcontrol.pixel_format == PEControl::RGBA6_Z24;
if (PerfQueryBase::ShouldEmulate())
g_perf_query->EnableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
g_vertex_manager->vFlush(useDstAlpha);
if (PerfQueryBase::ShouldEmulate())
g_perf_query->DisableQuery(bpmem.zcontrol.early_ztest ? PQG_ZCOMP_ZCOMPLOC : PQG_ZCOMP);
}
GFX_DEBUGGER_PAUSE_AT(NEXT_FLUSH, true);
@ -235,9 +263,69 @@ void VertexManager::Flush()
ERROR_LOG(VIDEO, "xf.numtexgens (%d) does not match bp.numtexgens (%d). Error in command stream.", xfmem.numTexGen.numTexGens, bpmem.genMode.numtexgens.Value());
IsFlushed = true;
CullAll = false;
}
void VertexManager::DoState(PointerWrap& p)
{
p.Do(ZSlope);
g_vertex_manager->vDoState(p);
}
void VertexManager::CalculateZSlope(NativeVertexFormat *format)
{
float vtx[9];
float out[12];
float viewOffset[2] = { xfmem.viewport.xOrig - bpmem.scissorOffset.x * 2,
xfmem.viewport.yOrig - bpmem.scissorOffset.y * 2};
// Global matrix ID.
u32 mtxIdx = g_main_cp_state.matrix_index_a.PosNormalMtxIdx;
const PortableVertexDeclaration vert_decl = format->GetVertexDeclaration();
size_t posOff = vert_decl.position.offset;
size_t mtxOff = vert_decl.posmtx.offset;
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if z--freeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
u8* vtx_ptr = s_pCurBufferPointer - vert_decl.stride * (3 - i);
vtx[0 + i * 3] = ((float*)(vtx_ptr + posOff))[0];
vtx[1 + i * 3] = ((float*)(vtx_ptr + posOff))[1];
vtx[2 + i * 3] = ((float*)(vtx_ptr + posOff))[2];
// If this vertex format has per-vertex position matrix IDs, look it up.
if(vert_decl.posmtx.enable)
mtxIdx = *((u32*)(vtx_ptr + mtxOff));
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4], mtxIdx);
// Transform to Screenspace
float inv_w = 1.0f / out[3 + i * 4];
out[0 + i * 4] = out[0 + i * 4] * inv_w * xfmem.viewport.wd + viewOffset[0];
out[1 + i * 4] = out[1 + i * 4] * inv_w * xfmem.viewport.ht + viewOffset[1];
out[2 + i * 4] = out[2 + i * 4] * inv_w * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
// Sometimes we process de-generate triangles. Stop any divide by zeros
if (c == 0)
return;
ZSlope.dfdx = -a / c;
ZSlope.dfdy = -b / c;
ZSlope.f0 = out[2] - (out[0] * ZSlope.dfdx + out[1] * ZSlope.dfdy);
ZSlope.dirty = true;
}

View File

@ -4,6 +4,7 @@
#include "Common/CommonFuncs.h"
#include "Common/CommonTypes.h"
#include "VideoCommon/DataReader.h"
#include "VideoCommon/NativeVertexFormat.h"
class NativeVertexFormat;
class PointerWrap;
@ -14,6 +15,14 @@ enum PrimitiveType {
PRIMITIVE_TRIANGLES,
};
struct Slope
{
float dfdx;
float dfdy;
float f0;
bool dirty;
};
class VertexManager
{
private:
@ -32,7 +41,7 @@ public:
// needs to be virtual for DX11's dtor
virtual ~VertexManager();
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride);
static DataReader PrepareForAdditionalData(int primitive, u32 count, u32 stride, bool cullall);
static void FlushData(u32 count, u32 stride);
static void Flush();
@ -55,6 +64,11 @@ protected:
static u32 GetRemainingSize();
static u32 GetRemainingIndices(int primitive);
static Slope ZSlope;
static void CalculateZSlope(NativeVertexFormat *format);
static bool CullAll;
private:
static bool IsFlushed;

View File

@ -167,7 +167,21 @@ static void ViewportCorrectionMatrix(Matrix44& result)
void VertexShaderManager::Init()
{
Dirty();
// Initialize state tracking variables
nTransformMatricesChanged[0] = -1;
nTransformMatricesChanged[1] = -1;
nNormalMatricesChanged[0] = -1;
nNormalMatricesChanged[1] = -1;
nPostTransformMatricesChanged[0] = -1;
nPostTransformMatricesChanged[1] = -1;
nLightsChanged[0] = -1;
nLightsChanged[1] = -1;
nMaterialsChanged = BitSet32(0);
bTexMatricesChanged[0] = false;
bTexMatricesChanged[1] = false;
bPosNormalMatrixChanged = false;
bProjectionChanged = true;
bViewportChanged = false;
memset(&xfmem, 0, sizeof(xfmem));
memset(&constants, 0 , sizeof(constants));
@ -178,6 +192,8 @@ void VertexShaderManager::Init()
memset(g_fProjectionMatrix, 0, sizeof(g_fProjectionMatrix));
for (int i = 0; i < 4; ++i)
g_fProjectionMatrix[i*5] = 1.0f;
dirty = true;
}
void VertexShaderManager::Shutdown()
@ -186,26 +202,10 @@ void VertexShaderManager::Shutdown()
void VertexShaderManager::Dirty()
{
nTransformMatricesChanged[0] = 0;
nTransformMatricesChanged[1] = 256;
nNormalMatricesChanged[0] = 0;
nNormalMatricesChanged[1] = 96;
nPostTransformMatricesChanged[0] = 0;
nPostTransformMatricesChanged[1] = 256;
nLightsChanged[0] = 0;
nLightsChanged[1] = 0x80;
bPosNormalMatrixChanged = true;
bTexMatricesChanged[0] = true;
bTexMatricesChanged[1] = true;
// This function is called after a savestate is loaded.
// Any constants that can changed based on settings should be re-calculated
bProjectionChanged = true;
nMaterialsChanged = BitSet32::AllTrue(4);
dirty = true;
}
@ -690,6 +690,25 @@ void VertexShaderManager::ResetView()
bProjectionChanged = true;
}
void VertexShaderManager::TransformToClipSpace(const float* data, float *out, u32 MtxIdx)
{
const float *world_matrix = (const float *)xfmem.posMatrices + (MtxIdx & 0x3f) * 4;
// We use the projection matrix calculated by vertexShaderManager, because it
// includes any free look transformations.
// Make sure VertexManager::SetConstants() has been called first.
const float *proj_matrix = &g_fProjectionMatrix[0];
float t[3];
t[0] = data[0] * world_matrix[0] + data[1] * world_matrix[1] + data[2] * world_matrix[2] + world_matrix[3];
t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7];
t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11];
out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
out[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
}
void VertexShaderManager::DoState(PointerWrap &p)
{
p.Do(g_fProjectionMatrix);
@ -698,8 +717,19 @@ void VertexShaderManager::DoState(PointerWrap &p)
p.Do(s_viewInvRotationMatrix);
p.Do(s_fViewTranslationVector);
p.Do(s_fViewRotation);
p.Do(nTransformMatricesChanged);
p.Do(nNormalMatricesChanged);
p.Do(nPostTransformMatricesChanged);
p.Do(nLightsChanged);
p.Do(nMaterialsChanged);
p.Do(bTexMatricesChanged);
p.Do(bPosNormalMatrixChanged);
p.Do(bProjectionChanged);
p.Do(bViewportChanged);
p.Do(constants);
p.Do(dirty);
if (p.GetMode() == PointerWrap::MODE_READ)
{

View File

@ -34,6 +34,12 @@ public:
static void RotateView(float x, float y);
static void ResetView();
// data: 3 floats representing the X, Y and Z vertex model coordinates and the posmatrix index.
// out: 4 floats which will be initialized with the corresponding clip space coordinates
// NOTE: g_fProjectionMatrix must be up to date when this is called
// (i.e. VertexShaderManager::SetConstants needs to be called before using this!)
static void TransformToClipSpace(const float* data, float *out, u32 mtxIdx);
static VertexShaderConstants constants;
static bool dirty;
};