Initial port of zfreeze branch (3.5-1729)
Initial port of original zfreeze branch (3.5-1729) by neobrain into most recent build of Dolphin. Makes Rogue Squadron 2 very playable at full speed thanks to recent core speedups made to Dolphin. Works on DirectX Video plugin only for now. Enjoy! and Merry Xmas!!
This commit is contained in:
parent
4984215971
commit
937844b9e3
|
@ -178,9 +178,51 @@ void VertexManager::vFlush(bool useDstAlpha)
|
|||
}
|
||||
|
||||
u32 stride = VertexLoaderManager::GetCurrentVertexFormat()->GetVertexStride();
|
||||
u32 indices = IndexGenerator::GetIndexLen();
|
||||
|
||||
PrepareDrawBuffers(stride);
|
||||
|
||||
if (!bpmem.genMode.zfreeze && indices >= 3)
|
||||
{
|
||||
float vtx[9];
|
||||
float out[12];
|
||||
|
||||
// Lookup vertices of the last rendered triangle and software-transform them
|
||||
// This allows us to determine the depth slope, which will be used if zfreeze
|
||||
// is enabled in the following flush.
|
||||
for (unsigned int i = 0; i < 3; ++i)
|
||||
{
|
||||
const int base_index = GetIndexBuffer()[indices - 3 + i];
|
||||
u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride];
|
||||
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
|
||||
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
|
||||
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
|
||||
|
||||
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
|
||||
|
||||
// viewport offset ignored because we only look at coordinate differences.
|
||||
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd;
|
||||
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht;
|
||||
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
|
||||
}
|
||||
float dx31 = out[8] - out[0];
|
||||
float dx12 = out[0] - out[4];
|
||||
float dy12 = out[1] - out[5];
|
||||
float dy31 = out[9] - out[1];
|
||||
|
||||
float DF31 = out[10] - out[2];
|
||||
float DF21 = out[6] - out[2];
|
||||
float a = DF31 * -dy12 - DF21 * dy31;
|
||||
float b = dx31 * DF21 + dx12 * DF31;
|
||||
float c = -dx12 * dy31 - dx31 * -dy12;
|
||||
|
||||
float slope_dfdx = -a / c;
|
||||
float slope_dfdy = -b / c;
|
||||
float slope_f0 = out[2];
|
||||
|
||||
PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0);
|
||||
}
|
||||
|
||||
VertexLoaderManager::GetCurrentVertexFormat()->SetupVertexPointers();
|
||||
g_renderer->ApplyState(useDstAlpha);
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ public:
|
|||
protected:
|
||||
virtual void ResetBuffer(u32 stride) override;
|
||||
u16* GetIndexBuffer() { return &LocalIBuffer[0]; }
|
||||
u8* GetVertexBuffer() { return &LocalVBuffer[0]; }
|
||||
|
||||
private:
|
||||
|
||||
|
|
|
@ -42,6 +42,13 @@ static size_t s_index_offset;
|
|||
|
||||
VertexManager::VertexManager()
|
||||
{
|
||||
LocalVBuffer.resize(MAXVBUFFERSIZE);
|
||||
|
||||
s_pCurBufferPointer = s_pBaseBufferPointer = &LocalVBuffer[0];
|
||||
s_pEndBufferPointer = s_pBaseBufferPointer + LocalVBuffer.size();
|
||||
|
||||
LocalIBuffer.resize(MAXIBUFFERSIZE);
|
||||
|
||||
CreateDeviceObjects();
|
||||
}
|
||||
|
||||
|
@ -131,6 +138,7 @@ void VertexManager::vFlush(bool useDstAlpha)
|
|||
{
|
||||
GLVertexFormat *nativeVertexFmt = (GLVertexFormat*)VertexLoaderManager::GetCurrentVertexFormat();
|
||||
u32 stride = nativeVertexFmt->GetVertexStride();
|
||||
u32 indices = IndexGenerator::GetIndexLen();
|
||||
|
||||
if (m_last_vao != nativeVertexFmt->VAO)
|
||||
{
|
||||
|
@ -140,6 +148,47 @@ void VertexManager::vFlush(bool useDstAlpha)
|
|||
|
||||
PrepareDrawBuffers(stride);
|
||||
|
||||
if (!bpmem.genMode.zfreeze && indices >= 3)
|
||||
{
|
||||
float vtx[9];
|
||||
float out[12];
|
||||
|
||||
// Lookup vertices of the last rendered triangle and software-transform them
|
||||
// This allows us to determine the depth slope, which will be used if zfreeze
|
||||
// is enabled in the following flush.
|
||||
for (unsigned int i = 0; i < 3; ++i)
|
||||
{
|
||||
const int base_index = GetIndexBuffer()[indices - 3 + i];
|
||||
u8* vtx_ptr = &((u8*)GetVertexBuffer())[base_index * stride];
|
||||
vtx[0 + i * 3] = ((float*)vtx_ptr)[0];
|
||||
vtx[1 + i * 3] = ((float*)vtx_ptr)[1];
|
||||
vtx[2 + i * 3] = ((float*)vtx_ptr)[2];
|
||||
|
||||
VertexShaderManager::TransformToClipSpace(&vtx[i * 3], &out[i * 4]);
|
||||
|
||||
// viewport offset ignored because we only look at coordinate differences.
|
||||
out[0 + i * 4] = out[0 + i * 4] / out[3 + i * 4] * xfmem.viewport.wd;
|
||||
out[1 + i * 4] = out[1 + i * 4] / out[3 + i * 4] * xfmem.viewport.ht;
|
||||
out[2 + i * 4] = out[2 + i * 4] / out[3 + i * 4] * xfmem.viewport.zRange + xfmem.viewport.farZ;
|
||||
}
|
||||
float dx31 = out[8] - out[0];
|
||||
float dx12 = out[0] - out[4];
|
||||
float dy12 = out[1] - out[5];
|
||||
float dy31 = out[9] - out[1];
|
||||
|
||||
float DF31 = out[10] - out[2];
|
||||
float DF21 = out[6] - out[2];
|
||||
float a = DF31 * -dy12 - DF21 * dy31;
|
||||
float b = dx31 * DF21 + dx12 * DF31;
|
||||
float c = -dx12 * dy31 - dx31 * -dy12;
|
||||
|
||||
float slope_dfdx = -a / c;
|
||||
float slope_dfdy = -b / c;
|
||||
float slope_f0 = out[2];
|
||||
|
||||
PixelShaderManager::SetZSlopeChanged(slope_dfdx, slope_dfdy, slope_f0);
|
||||
}
|
||||
|
||||
// Makes sure we can actually do Dual source blending
|
||||
bool dualSourcePossible = g_ActiveConfig.backend_info.bSupportsDualSourceBlend;
|
||||
|
||||
|
|
|
@ -42,10 +42,15 @@ public:
|
|||
GLuint m_last_vao;
|
||||
protected:
|
||||
virtual void ResetBuffer(u32 stride) override;
|
||||
u16* GetIndexBuffer() { return &LocalIBuffer[0]; }
|
||||
u8* GetVertexBuffer() { return &LocalVBuffer[0]; }
|
||||
private:
|
||||
void Draw(u32 stride);
|
||||
void vFlush(bool useDstAlpha) override;
|
||||
void PrepareDrawBuffers(u32 stride);
|
||||
|
||||
std::vector<u8> LocalVBuffer;
|
||||
std::vector<u16> LocalIBuffer;
|
||||
};
|
||||
|
||||
}
|
||||
|
|
|
@ -23,6 +23,7 @@ struct PixelShaderConstants
|
|||
int4 fogcolor;
|
||||
int4 fogi;
|
||||
float4 fogf[2];
|
||||
float4 zslope;
|
||||
};
|
||||
|
||||
struct VertexShaderConstants
|
||||
|
|
|
@ -228,6 +228,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||
"\tint4 " I_FOGCOLOR";\n"
|
||||
"\tint4 " I_FOGI";\n"
|
||||
"\tfloat4 " I_FOGF"[2];\n"
|
||||
"\tfloat4 " I_ZSLOPE";\n"
|
||||
"};\n");
|
||||
|
||||
if (g_ActiveConfig.bEnablePixelLighting)
|
||||
|
@ -269,7 +270,7 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||
out.Write("};\n");
|
||||
|
||||
const bool forced_early_z = g_ActiveConfig.backend_info.bSupportsEarlyZ && bpmem.UseEarlyDepthTest() && (g_ActiveConfig.bFastDepthCalc || bpmem.alpha_test.TestResult() == AlphaTest::UNDETERMINED);
|
||||
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z);
|
||||
const bool per_pixel_depth = (bpmem.ztex2.op != ZTEXTURE_DISABLE && bpmem.UseLateDepthTest()) || (!g_ActiveConfig.bFastDepthCalc && bpmem.zmode.testenable && !forced_early_z) || bpmem.genMode.zfreeze;
|
||||
|
||||
if (forced_early_z)
|
||||
{
|
||||
|
@ -538,10 +539,20 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||
uid_data->fast_depth_calc = g_ActiveConfig.bFastDepthCalc;
|
||||
uid_data->early_ztest = bpmem.UseEarlyDepthTest();
|
||||
uid_data->fog_fsel = bpmem.fog.c_proj_fsel.fsel;
|
||||
uid_data->zfreeze = bpmem.genMode.zfreeze;
|
||||
|
||||
// Note: z-textures are not written to depth buffer if early depth test is used
|
||||
if (per_pixel_depth && bpmem.UseEarlyDepthTest())
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
{
|
||||
if (bpmem.genMode.zfreeze)
|
||||
{
|
||||
out.Write("\tdepth = " I_ZSLOPE".z + " I_ZSLOPE".x * (clipPos.x / clipPos.w) + " I_ZSLOPE".y * (clipPos.y / clipPos.w);\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
}
|
||||
}
|
||||
|
||||
// Note: depth texture output is only written to depth buffer if late depth test is used
|
||||
// theoretical final depth value is used for fog calculation, though, so we have to emulate ztextures anyway
|
||||
|
@ -555,7 +566,16 @@ static inline void GeneratePixelShader(T& out, DSTALPHA_MODE dstAlphaMode, API_T
|
|||
}
|
||||
|
||||
if (per_pixel_depth && bpmem.UseLateDepthTest())
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
{
|
||||
if (bpmem.genMode.zfreeze)
|
||||
{
|
||||
out.Write("\tdepth = " I_ZSLOPE".z + " I_ZSLOPE".x * (clipPos.x / clipPos.w) + " I_ZSLOPE".y * (clipPos.y / clipPos.w);\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
out.Write("\tdepth = float(zCoord) / float(0xFFFFFF);\n");
|
||||
}
|
||||
}
|
||||
|
||||
if (dstAlphaMode == DSTALPHA_ALPHA_PASS)
|
||||
{
|
||||
|
|
|
@ -21,8 +21,9 @@
|
|||
#define C_FOGCOLOR (C_INDTEXMTX + 6) //27
|
||||
#define C_FOGI (C_FOGCOLOR + 1) //28
|
||||
#define C_FOGF (C_FOGI + 1) //29
|
||||
#define C_ZSLOPE (C_FOGF + 1) //30
|
||||
|
||||
#define C_PENVCONST_END (C_FOGF + 2)
|
||||
#define C_PENVCONST_END (C_ZSLOPE + 2)
|
||||
|
||||
// Different ways to achieve rendering with destination alpha
|
||||
enum DSTALPHA_MODE
|
||||
|
@ -62,6 +63,7 @@ struct pixel_shader_uid_data
|
|||
u32 forced_early_z : 1;
|
||||
u32 early_ztest : 1;
|
||||
u32 bounding_box : 1;
|
||||
u32 zfreeze : 1;
|
||||
|
||||
u32 texMtxInfo_n_projection : 8; // 8x1 bit
|
||||
u32 tevindref_bi0 : 3;
|
||||
|
|
|
@ -14,6 +14,8 @@
|
|||
|
||||
bool PixelShaderManager::s_bFogRangeAdjustChanged;
|
||||
bool PixelShaderManager::s_bViewPortChanged;
|
||||
bool PixelShaderManager::s_bZSlopeChanged;
|
||||
static float zslope[3];
|
||||
|
||||
std::array<int4,4> PixelShaderManager::s_tev_color;
|
||||
std::array<int4,4> PixelShaderManager::s_tev_konst_color;
|
||||
|
@ -48,6 +50,7 @@ void PixelShaderManager::Dirty()
|
|||
SetDestAlpha();
|
||||
SetZTextureBias();
|
||||
SetViewportChanged();
|
||||
SetZSlopeChanged(0, 0, 1);
|
||||
SetIndTexScaleChanged(false);
|
||||
SetIndTexScaleChanged(true);
|
||||
SetIndMatrixChanged(0);
|
||||
|
@ -112,6 +115,17 @@ void PixelShaderManager::SetConstants()
|
|||
dirty = true;
|
||||
s_bViewPortChanged = false;
|
||||
}
|
||||
|
||||
if (s_bZSlopeChanged)
|
||||
{
|
||||
constants.zslope[0] = zslope[0];
|
||||
constants.zslope[1] = zslope[1];
|
||||
constants.zslope[2] = zslope[2];
|
||||
constants.zslope[3] = 0;
|
||||
|
||||
dirty = true;
|
||||
s_bZSlopeChanged = false;
|
||||
}
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetTevColor(int index, int component, s32 value)
|
||||
|
@ -168,6 +182,14 @@ void PixelShaderManager::SetViewportChanged()
|
|||
s_bFogRangeAdjustChanged = true; // TODO: Shouldn't be necessary with an accurate fog range adjust implementation
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetZSlopeChanged(float dfdx, float dfdy, float f0)
|
||||
{
|
||||
zslope[0] = dfdx;
|
||||
zslope[1] = dfdy;
|
||||
zslope[2] = f0;
|
||||
s_bZSlopeChanged = true;
|
||||
}
|
||||
|
||||
void PixelShaderManager::SetIndTexScaleChanged(bool high)
|
||||
{
|
||||
constants.indtexscale[high][0] = bpmem.texscale[high].ss0;
|
||||
|
|
|
@ -36,6 +36,7 @@ public:
|
|||
static void SetTexDims(int texmapid, u32 width, u32 height, u32 wraps, u32 wrapt);
|
||||
static void SetZTextureBias();
|
||||
static void SetViewportChanged();
|
||||
static void SetZSlopeChanged(float dfdx, float dfdy, float f0);
|
||||
static void SetIndMatrixChanged(int matrixidx);
|
||||
static void SetTevKSelChanged(int id);
|
||||
static void SetZTextureTypeChanged();
|
||||
|
@ -50,6 +51,7 @@ public:
|
|||
|
||||
static bool s_bFogRangeAdjustChanged;
|
||||
static bool s_bViewPortChanged;
|
||||
static bool s_bZSlopeChanged;
|
||||
|
||||
// These colors aren't available from global BP state,
|
||||
// hence we keep a copy of them around.
|
||||
|
|
|
@ -291,6 +291,7 @@ static inline void AssignVSOutputMembers(T& object, const char* a, const char* b
|
|||
#define I_FOGCOLOR "cfogcolor"
|
||||
#define I_FOGI "cfogi"
|
||||
#define I_FOGF "cfogf"
|
||||
#define I_ZSLOPE "czslope"
|
||||
|
||||
#define I_POSNORMALMATRIX "cpnmtx"
|
||||
#define I_PROJECTION "cproj"
|
||||
|
|
|
@ -690,6 +690,24 @@ void VertexShaderManager::ResetView()
|
|||
bProjectionChanged = true;
|
||||
}
|
||||
|
||||
void VertexShaderManager::TransformToClipSpace(const float* data, float *out)
|
||||
{
|
||||
const float *world_matrix = (const float *)xfmem.posMatrices + g_main_cp_state.matrix_index_a.PosNormalMtxIdx * 4;
|
||||
const float *proj_matrix = &g_fProjectionMatrix[0];
|
||||
|
||||
float t[3];
|
||||
t[0] = data[0] * world_matrix[0] + data[1] * world_matrix[1] + data[2] * world_matrix[2] + world_matrix[3];
|
||||
t[1] = data[0] * world_matrix[4] + data[1] * world_matrix[5] + data[2] * world_matrix[6] + world_matrix[7];
|
||||
t[2] = data[0] * world_matrix[8] + data[1] * world_matrix[9] + data[2] * world_matrix[10] + world_matrix[11];
|
||||
|
||||
// TODO: this requires g_fProjectionMatrix to be up to date, which is not really a good design decision.
|
||||
|
||||
out[0] = t[0] * proj_matrix[0] + t[1] * proj_matrix[1] + t[2] * proj_matrix[2] + proj_matrix[3];
|
||||
out[1] = t[0] * proj_matrix[4] + t[1] * proj_matrix[5] + t[2] * proj_matrix[6] + proj_matrix[7];
|
||||
out[2] = t[0] * proj_matrix[8] + t[1] * proj_matrix[9] + t[2] * proj_matrix[10] + proj_matrix[11];
|
||||
out[3] = t[0] * proj_matrix[12] + t[1] * proj_matrix[13] + t[2] * proj_matrix[14] + proj_matrix[15];
|
||||
}
|
||||
|
||||
void VertexShaderManager::DoState(PointerWrap &p)
|
||||
{
|
||||
p.Do(g_fProjectionMatrix);
|
||||
|
|
|
@ -34,6 +34,12 @@ public:
|
|||
static void RotateView(float x, float y);
|
||||
static void ResetView();
|
||||
|
||||
// data: 3 floats representing the X, Y and Z vertex model coordinates
|
||||
// out: 4 floats which will be initialized with the corresponding clip space coordinates
|
||||
// NOTE: g_fProjectionMatrix must be up to date when this is called
|
||||
// (i.e. VertexShaderManager::SetConstants needs to be called before using this!)
|
||||
static void TransformToClipSpace(const float* data, float *out);
|
||||
|
||||
static VertexShaderConstants constants;
|
||||
static bool dirty;
|
||||
};
|
||||
|
|
Loading…
Reference in New Issue