Cleanup and refactor of zfreeze port

Based on the feedback from pull request #1767 I have put in most of
degasus's suggestions in here now.

I think we have a real winner here as moving the code to
VertexManagerBase for a function has allowed OGL to utilize zfreeze now
:)

Correct use of the vertex pointer has also corrected most of the issue
found in pull request #1767 that JMC47 stated.  Which also for me now
has Mario Tennis working with no polygon spikes on the characters
anymore!  Shadows are still an issue and probably in the other games
with shadow problems.  Rebel Strike also seems better but random skybox
glitches can show up.
This commit is contained in:
NanoByte011 2014-12-26 01:25:24 -07:00 committed by Scott Mansell
parent 937844b9e3
commit 613781c765
9 changed files with 57 additions and 114 deletions

View File

@ -178,49 +178,12 @@ void VertexManager::vFlush(bool useDstAlpha)
}
u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride();
u32 indices = IndexGenerator::GetIndexLen();
PrepareDrawBuffers(stride);
if (!bpmem.genMode.zfreeze && indices >= 3)
if (!bpmem.genMode.zfreeze && IndexGenerator::GetIndexLen() >= 3)
{
float vtx[9];
float out[12];
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if zfreeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
const int base_index = GetIndexBuffer()[indices - 3 + i];
u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride];
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
// viewport offset ignored because we only look at coordinate differences.
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd;
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht;
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
float slope_dfdx = -a / c;
float slope_dfdy = -b / c;
float slope_f0 = out[2];
PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0);
CalculateZSlope(stride);
}
VertexLoaderManager::GetCurrentVertexFormat()->SetupVertexPointers();

View File

@ -22,7 +22,6 @@ public:
protected:
virtual void ResetBuffer(u32 stride) override;
u16* GetIndexBuffer() { return &LocalIBuffer[0]; }
u8* GetVertexBuffer() { return &LocalVBuffer[0]; }
private:

View File

@ -42,13 +42,6 @@ static size_t s_index_offset;
VertexManager::VertexManager()
{
LocalVBuffer.resize(MAXVBUFFERSIZE);
s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0];
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
LocalIBuffer.resize(MAXIBUFFERSIZE);
CreateDeviceObjects();
}
@ -138,7 +131,6 @@ void VertexManager::vFlush(bool useDstAlpha)
{
GLVertexFormat *nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
u32 stride = nativeVertexFmt->GetVertexStride();
u32 indices = IndexGenerator::GetIndexLen();
if (m_last_vao != nativeVertexFmt->VAO)
{
@ -148,45 +140,9 @@ void VertexManager::vFlush(bool useDstAlpha)
PrepareDrawBuffers(stride);
if (!bpmem.genMode.zfreeze && indices >= 3)
if (!bpmem.genMode.zfreeze && IndexGenerator::GetIndexLen() >= 3)
{
float vtx[9];
float out[12];
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if zfreeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
const int base_index = GetIndexBuffer()[indices - 3 + i];
u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride];
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
// viewport offset ignored because we only look at coordinate differences.
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd;
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht;
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
float slope_dfdx = -a / c;
float slope_dfdy = -b / c;
float slope_f0 = out[2];
PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0);
CalculateZSlope(stride);
}
// Makes sure we can actually do Dual source blending

View File

@ -42,15 +42,11 @@ public:
GLuint m_last_vao;
protected:
virtual void ResetBuffer(u32 stride) override;
u16* GetIndexBuffer() { return &LocalIBuffer[0]; }
u8* GetVertexBuffer() { return &LocalVBuffer[0]; }
private:
void Draw(u32 stride);
void vFlush(bool useDstAlpha) override;
void PrepareDrawBuffers(u32 stride);
std::vector<u8> LocalVBuffer;
std::vector<u16> LocalIBuffer;
};
}

View File

@ -20,8 +20,8 @@
#define C_INDTEXMTX (C_INDTEXSCALE + 2) //21
#define C_FOGCOLOR (C_INDTEXMTX + 6) //27
#define C_FOGI (C_FOGCOLOR + 1) //28
#define C_FOGF (C_FOGI + 1) //29
#define C_ZSLOPE (C_FOGF + 1) //30
#define C_FOGF (C_FOGI + 2) //29
#define C_ZSLOPE (C_FOGF + 1) //31
#define C_PENVCONST_END (C_ZSLOPE + 2)

View File

@ -14,8 +14,6 @@
bool PixelShaderManager::s_bFogRangeAdjustChanged;
bool PixelShaderManager::s_bViewPortChanged;
bool PixelShaderManager::s_bZSlopeChanged;
static float zslope[3];
std::array<int4,4> PixelShaderManager::s_tev_color;
std::array<int4,4> PixelShaderManager::s_tev_konst_color;
@ -50,7 +48,7 @@ void PixelShaderManager::Dirty()
SetDestAlpha();
SetZTextureBias();
SetViewportChanged();
SetZSlopeChanged(0, 0, 1);
SetZSlope(0, 0, 1);
SetIndTexScaleChanged(false);
SetIndTexScaleChanged(true);
SetIndMatrixChanged(0);
@ -115,17 +113,6 @@ void PixelShaderManager::SetConstants()
dirty = true;
s_bViewPortChanged = false;
}
if (s_bZSlopeChanged)
{
constants.zslope[0] = zslope[0];
constants.zslope[1] = zslope[1];
constants.zslope[2] = zslope[2];
constants.zslope[3] = 0;
dirty = true;
s_bZSlopeChanged = false;
}
}
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
@ -182,12 +169,13 @@ void PixelShaderManager::SetViewportChanged()
s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
}
void PixelShaderManager::SetZSlopeChanged(float dfdx, float dfdy, float f0)
void PixelShaderManager::SetZSlope(float dfdx, float dfdy, float f0)
{
zslope[0] = dfdx;
zslope[1] = dfdy;
zslope[2] = f0;
s_bZSlopeChanged = true;
constants.zslope[0] = dfdx;
constants.zslope[1] = dfdy;
constants.zslope[2] = f0;
constants.zslope[3] = 0;
dirty = true;
}
void PixelShaderManager::SetIndTexScaleChanged(bool high)

View File

@ -36,7 +36,7 @@ public:
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
static void SetZTextureBias();
static void SetViewportChanged();
static void SetZSlopeChanged(float dfdx, float dfdy, float f0);
static void SetZSlope(float dfdx, float dfdy, float f0);
static void SetIndMatrixChanged(int matrixidx);
static void SetTevKSelChanged(int id);
static void SetZTextureTypeChanged();
@ -51,7 +51,6 @@ public:
static bool s_bFogRangeAdjustChanged;
static bool s_bViewPortChanged;
static bool s_bZSlopeChanged;
// These colors aren't available from global BP state,
// hence we keep a copy of them around.

View File

@ -241,3 +241,43 @@ void VertexManager::DoState(PointerWrap& p)
{
g_vertex_manager->vDoState(p);
}
void VertexManager::CalculateZSlope(u32 stride)
{
float vtx[9];
float out[12];
// Lookup vertices of the last rendered triangle and software-transform them
// This allows us to determine the depth slope, which will be used if zfreeze
// is enabled in the following flush.
for (unsigned int i = 0; i < 3; ++i)
{
u8* vtx_ptr = s_pCurBufferPointer - stride * (3 - i);
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
// viewport offset ignored because we only look at coordinate differences.
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd;
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht;
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
}
float dx31 = out[8] - out[0];
float dx12 = out[0] - out[4];
float dy12 = out[1] - out[5];
float dy31 = out[9] - out[1];
float DF31 = out[10] - out[2];
float DF21 = out[6] - out[2];
float a = DF31 * -dy12 - DF21 * dy31;
float b = dx31 * DF21 + dx12 * DF31;
float c = -dx12 * dy31 - dx31 * -dy12;
float slope_dfdx = -a / c;
float slope_dfdy = -b / c;
float slope_f0 = out[2];
PixelShaderManager::SetZSlope(slope_dfdx, slope_dfdy, slope_f0);
}

View File

@ -41,6 +41,8 @@ public:
static void DoState(PointerWrap& p);
static void CalculateZSlope(u32 stride);
protected:
virtual void vDoState(PointerWrap& p) { }