GS: Reuse backup environment instead of copying

This commit is contained in:
Stenzek 2023-04-03 02:13:40 +10:00 committed by refractionpcsx2
parent 4313c64d9d
commit cf772fcdd6
10 changed files with 622 additions and 635 deletions

View File

@ -142,3 +142,132 @@ GIFRegTEX0 GSDrawingContext::GetSizeFixedTEX0(const GSVector4& st, bool linear,
return res;
}
void GSDrawingContext::Dump(const std::string& filename)
{
// Append on purpose so env + context are merged into a single file
FILE* fp = fopen(filename.c_str(), "at");
if (!fp)
return;
fprintf(fp,
"XYOFFSET\n"
"\tX:%u\n"
"\tY:%u\n\n",
XYOFFSET.OFX, XYOFFSET.OFY);
fprintf(fp,
"MIPTBP1\n"
"\tBP1:0x%x\n"
"\tBW1:%u\n"
"\tBP2:0x%x\n"
"\tBW2:%u\n"
"\tBP3:0x%x\n"
"\tBW3:%u\n\n",
static_cast<uint32_t>(MIPTBP1.TBP1), static_cast<uint32_t>(MIPTBP1.TBW1), static_cast<uint32_t>(MIPTBP1.TBP2),
static_cast<uint32_t>(MIPTBP1.TBW2), static_cast<uint32_t>(MIPTBP1.TBP3), static_cast<uint32_t>(MIPTBP1.TBW3));
fprintf(fp,
"MIPTBP2\n"
"\tBP4:0x%x\n"
"\tBW4:%u\n"
"\tBP5:0x%x\n"
"\tBW5:%u\n"
"\tBP6:0x%x\n"
"\tBW6:%u\n\n",
static_cast<uint32_t>(MIPTBP2.TBP4), static_cast<uint32_t>(MIPTBP2.TBW4), static_cast<uint32_t>(MIPTBP2.TBP5),
static_cast<uint32_t>(MIPTBP2.TBW5), static_cast<uint32_t>(MIPTBP2.TBP6), static_cast<uint32_t>(MIPTBP2.TBW6));
fprintf(fp,
"TEX0\n"
"\tTBP0:0x%x\n"
"\tTBW:%u\n"
"\tPSM:0x%x\n"
"\tTW:%u\n"
"\tTCC:%u\n"
"\tTFX:%u\n"
"\tCBP:0x%x\n"
"\tCPSM:0x%x\n"
"\tCSM:%u\n"
"\tCSA:%u\n"
"\tCLD:%u\n"
"\tTH:%u\n",
TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD,
static_cast<uint32_t>(TEX0.TH));
fprintf(fp,
"TEX1\n"
"\tLCM:%u\n"
"\tMXL:%u\n"
"\tMMAG:%u\n"
"\tMMIN:%u\n"
"\tMTBA:%u\n"
"\tL:%u\n"
"\tK:%d\n\n",
TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K);
fprintf(fp,
"CLAMP\n"
"\tWMS:%u\n"
"\tWMT:%u\n"
"\tMINU:%u\n"
"\tMAXU:%u\n"
"\tMAXV:%u\n"
"\tMINV:%u\n\n",
CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, static_cast<uint32_t>(CLAMP.MINV));
// TODO mimmap? (yes I'm lazy)
fprintf(fp,
"SCISSOR\n"
"\tX0:%u\n"
"\tX1:%u\n"
"\tY0:%u\n"
"\tY1:%u\n\n",
SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1);
fprintf(fp,
"ALPHA\n"
"\tA:%u\n"
"\tB:%u\n"
"\tC:%u\n"
"\tD:%u\n"
"\tFIX:%u\n",
ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX);
const char* col[3] = {"Cs", "Cd", "0"};
const char* alpha[3] = {"As", "Ad", "Af"};
fprintf(fp, "\t=> (%s - %s) * %s + %s\n\n", col[ALPHA.A], col[ALPHA.B], alpha[ALPHA.C], col[ALPHA.D]);
fprintf(fp,
"TEST\n"
"\tATE:%u\n"
"\tATST:%u\n"
"\tAREF:%u\n"
"\tAFAIL:%u\n"
"\tDATE:%u\n"
"\tDATM:%u\n"
"\tZTE:%u\n"
"\tZTST:%u\n\n",
TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST);
fprintf(fp,
"FBA\n"
"\tFBA:%u\n\n",
FBA.FBA);
fprintf(fp,
"FRAME\n"
"\tFBP (*32):0x%x\n"
"\tFBW:%u\n"
"\tPSM:0x%x\n"
"\tFBMSK:0x%x\n\n",
FRAME.FBP * 32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK);
fprintf(fp,
"ZBUF\n"
"\tZBP (*32):0x%x\n"
"\tPSM:0x%x\n"
"\tZMSK:%u\n\n",
ZBUF.ZBP * 32, ZBUF.PSM, ZBUF.ZMSK);
fclose(fp);
}

View File

@ -51,22 +51,6 @@ public:
GSPixelOffset4* fzb4;
} offset;
struct
{
GIFRegXYOFFSET XYOFFSET;
GIFRegTEX0 TEX0;
GIFRegTEX1 TEX1;
GIFRegCLAMP CLAMP;
GIFRegMIPTBP1 MIPTBP1;
GIFRegMIPTBP2 MIPTBP2;
GIFRegSCISSOR SCISSOR;
GIFRegALPHA ALPHA;
GIFRegTEST TEST;
GIFRegFBA FBA;
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
} stack;
GSDrawingContext()
{
memset(&offset, 0, sizeof(offset));
@ -118,170 +102,7 @@ public:
(int)XYOFFSET.OFY - 15);
}
bool DepthRead() const
{
return TEST.ZTE && TEST.ZTST >= 2;
}
bool DepthWrite() const
{
if (TEST.ATE && TEST.ATST == ATST_NEVER && TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated
{
return false;
}
return ZBUF.ZMSK == 0 && TEST.ZTE != 0; // ZTE == 0 is bug on the real hardware, write is blocked then
}
GIFRegTEX0 GetSizeFixedTEX0(const GSVector4& st, bool linear, bool mipmap = false) const;
// Save & Restore before/after draw allow to correct/optimize current register for current draw
// Note: we could avoid the restore part if all renderer code is updated to use a local copy instead
void SaveReg()
{
stack.XYOFFSET = XYOFFSET;
stack.TEX0 = TEX0;
stack.TEX1 = TEX1;
stack.CLAMP = CLAMP;
stack.MIPTBP1 = MIPTBP1;
stack.MIPTBP2 = MIPTBP2;
stack.SCISSOR = SCISSOR;
stack.ALPHA = ALPHA;
stack.TEST = TEST;
stack.FBA = FBA;
stack.FRAME = FRAME;
stack.ZBUF = ZBUF;
}
void RestoreReg()
{
XYOFFSET = stack.XYOFFSET;
TEX0 = stack.TEX0;
TEX1 = stack.TEX1;
CLAMP = stack.CLAMP;
MIPTBP1 = stack.MIPTBP1;
MIPTBP2 = stack.MIPTBP2;
SCISSOR = stack.SCISSOR;
ALPHA = stack.ALPHA;
TEST = stack.TEST;
FBA = stack.FBA;
FRAME = stack.FRAME;
ZBUF = stack.ZBUF;
}
void Dump(const std::string& filename)
{
// Append on purpose so env + context are merged into a single file
FILE* fp = fopen(filename.c_str(), "at");
if (!fp)
return;
fprintf(fp, "XYOFFSET\n"
"\tX:%u\n"
"\tY:%u\n\n"
, XYOFFSET.OFX, XYOFFSET.OFY);
fprintf(fp, "MIPTBP1\n"
"\tBP1:0x%x\n"
"\tBW1:%u\n"
"\tBP2:0x%x\n"
"\tBW2:%u\n"
"\tBP3:0x%x\n"
"\tBW3:%u\n\n"
, static_cast<uint32_t>(MIPTBP1.TBP1), static_cast<uint32_t>(MIPTBP1.TBW1), static_cast<uint32_t>(MIPTBP1.TBP2)
, static_cast<uint32_t>(MIPTBP1.TBW2), static_cast<uint32_t>(MIPTBP1.TBP3), static_cast<uint32_t>(MIPTBP1.TBW3));
fprintf(fp, "MIPTBP2\n"
"\tBP4:0x%x\n"
"\tBW4:%u\n"
"\tBP5:0x%x\n"
"\tBW5:%u\n"
"\tBP6:0x%x\n"
"\tBW6:%u\n\n"
, static_cast<uint32_t>(MIPTBP2.TBP4), static_cast<uint32_t>(MIPTBP2.TBW4), static_cast<uint32_t>(MIPTBP2.TBP5)
, static_cast<uint32_t>(MIPTBP2.TBW5), static_cast<uint32_t>(MIPTBP2.TBP6), static_cast<uint32_t>(MIPTBP2.TBW6));
fprintf(fp, "TEX0\n"
"\tTBP0:0x%x\n"
"\tTBW:%u\n"
"\tPSM:0x%x\n"
"\tTW:%u\n"
"\tTCC:%u\n"
"\tTFX:%u\n"
"\tCBP:0x%x\n"
"\tCPSM:0x%x\n"
"\tCSM:%u\n"
"\tCSA:%u\n"
"\tCLD:%u\n"
"\tTH:%u\n"
, TEX0.TBP0, TEX0.TBW, TEX0.PSM, TEX0.TW, TEX0.TCC, TEX0.TFX, TEX0.CBP, TEX0.CPSM, TEX0.CSM, TEX0.CSA, TEX0.CLD, static_cast<uint32_t>(TEX0.TH));
fprintf(fp, "TEX1\n"
"\tLCM:%u\n"
"\tMXL:%u\n"
"\tMMAG:%u\n"
"\tMMIN:%u\n"
"\tMTBA:%u\n"
"\tL:%u\n"
"\tK:%d\n\n"
, TEX1.LCM, TEX1.MXL, TEX1.MMAG, TEX1.MMIN, TEX1.MTBA, TEX1.L, TEX1.K);
fprintf(fp, "CLAMP\n"
"\tWMS:%u\n"
"\tWMT:%u\n"
"\tMINU:%u\n"
"\tMAXU:%u\n"
"\tMAXV:%u\n"
"\tMINV:%u\n\n"
, CLAMP.WMS, CLAMP.WMT, CLAMP.MINU, CLAMP.MAXU, CLAMP.MAXV, static_cast<uint32_t>(CLAMP.MINV));
// TODO mimmap? (yes I'm lazy)
fprintf(fp, "SCISSOR\n"
"\tX0:%u\n"
"\tX1:%u\n"
"\tY0:%u\n"
"\tY1:%u\n\n"
, SCISSOR.SCAX0, SCISSOR.SCAX1, SCISSOR.SCAY0, SCISSOR.SCAY1);
fprintf(fp, "ALPHA\n"
"\tA:%u\n"
"\tB:%u\n"
"\tC:%u\n"
"\tD:%u\n"
"\tFIX:%u\n"
, ALPHA.A, ALPHA.B, ALPHA.C, ALPHA.D, ALPHA.FIX);
const char* col[3] = {"Cs", "Cd", "0"};
const char* alpha[3] = {"As", "Ad", "Af"};
fprintf(fp, "\t=> (%s - %s) * %s + %s\n\n", col[ALPHA.A], col[ALPHA.B], alpha[ALPHA.C], col[ALPHA.D]);
fprintf(fp, "TEST\n"
"\tATE:%u\n"
"\tATST:%u\n"
"\tAREF:%u\n"
"\tAFAIL:%u\n"
"\tDATE:%u\n"
"\tDATM:%u\n"
"\tZTE:%u\n"
"\tZTST:%u\n\n"
, TEST.ATE, TEST.ATST, TEST.AREF, TEST.AFAIL, TEST.DATE, TEST.DATM, TEST.ZTE, TEST.ZTST);
fprintf(fp, "FBA\n"
"\tFBA:%u\n\n"
, FBA.FBA);
fprintf(fp, "FRAME\n"
"\tFBP (*32):0x%x\n"
"\tFBW:%u\n"
"\tPSM:0x%x\n"
"\tFBMSK:0x%x\n\n"
, FRAME.FBP * 32, FRAME.FBW, FRAME.PSM, FRAME.FBMSK);
fprintf(fp, "ZBUF\n"
"\tZBP (*32):0x%x\n"
"\tPSM:0x%x\n"
"\tZMSK:%u\n\n"
, ZBUF.ZBP * 32, ZBUF.PSM, ZBUF.ZMSK);
fclose(fp);
}
void Dump(const std::string& filename);
};

View File

@ -59,25 +59,9 @@ public:
CTXT[0].Reset();
CTXT[1].Reset();
memset(dimx, 0, sizeof(dimx));
}
GSVector4i dimx[8];
void UpdateDIMX()
{
dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0);
dimx[0] = dimx[1].xxzzlh();
dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0),
dimx[2] = dimx[3].xxzzlh();
dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0),
dimx[4] = dimx[5].xxzzlh();
dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0),
dimx[6] = dimx[7].xxzzlh();
}
void Dump(const std::string& filename)
void Dump(const std::string& filename) const
{
FILE* fp = fopen(filename.c_str(), "wt");
if (!fp)

View File

@ -154,7 +154,7 @@ void GSState::Reset(bool hardware_reset)
UpdateVertexKick();
m_env.UpdateDIMX();
UpdateDIMX();
for (u32 i = 0; i < 2; i++)
{
@ -1208,7 +1208,7 @@ void GSState::GIFRegHandlerDIMX(const GIFReg* RESTRICT r)
m_env.DIMX = r->DIMX;
if (update)
m_env.UpdateDIMX();
UpdateDIMX();
if (m_prev_env.DIMX != m_env.DIMX)
m_dirty_gs_regs |= (1 << DIRTY_REG_DIMX);
@ -1438,12 +1438,6 @@ void GSState::GIFRegHandlerHWREG(const GIFReg* RESTRICT r)
Write(reinterpret_cast<const u8*>(r), 8); // haunting ground
}
inline void GSState::CopyEnv(GSDrawingEnvironment* dest, GSDrawingEnvironment* src, int ctx)
{
memcpy(dest, src, 88);
memcpy(&dest->CTXT[ctx], &src->CTXT[ctx], 96);
}
void GSState::Flush(GSFlushReason reason)
{
FlushWrite();
@ -1455,35 +1449,29 @@ void GSState::Flush(GSFlushReason reason)
if (m_dirty_gs_regs)
{
const int ctx = m_prev_env.PRIM.CTXT;
bool restore_offsets = false;
CopyEnv(&m_backup_env, &m_env, ctx);
CopyEnv(&m_env, &m_prev_env, ctx);
m_env.UpdateDIMX();
m_env.CTXT[ctx].UpdateScissor();
m_draw_env = &m_prev_env;
PRIM = &m_prev_env.PRIM;
UpdateContext();
if (((m_backup_env.CTXT[ctx].ZBUF.U32[0] ^ m_env.CTXT[ctx].ZBUF.U32[0]) & 0x3f0001ff) || ((m_backup_env.CTXT[ctx].FRAME.U32[0] ^ m_env.CTXT[ctx].FRAME.U32[0]) & 0x3f3f01ff))
{
memcpy(&m_backup_env.CTXT[ctx].offset, &m_env.CTXT[ctx].offset, sizeof(m_env.CTXT[ctx].offset));
memcpy(&m_env.CTXT[ctx].offset, &m_prev_env.CTXT[ctx].offset, sizeof(m_env.CTXT[ctx].offset));
restore_offsets = true;
}
if (m_dirty_gs_regs & (1 << DIRTY_REG_DIMX))
UpdateDIMX();
FlushPrim();
if (restore_offsets)
memcpy(&m_env.CTXT[ctx].offset, &m_backup_env.CTXT[ctx].offset, sizeof(m_env.CTXT[ctx].offset));
CopyEnv(&m_env, &m_backup_env, ctx);
m_env.CTXT[ctx].UpdateScissor();
m_env.UpdateDIMX();
m_draw_env = &m_env;
PRIM = &m_env.PRIM;
UpdateContext();
if (m_dirty_gs_regs & (1 << DIRTY_REG_DIMX))
UpdateDIMX();
m_backed_up_ctx = -1;
}
else
{
FlushPrim();
}
m_dirty_gs_regs = 0;
}
@ -1641,15 +1629,15 @@ void GSState::FlushPrim()
// we can ignore the Z format, since it won't be used in the draw (Star Ocean 3 transitions)
const bool ignoreZ = m_context->ZBUF.ZMSK && m_context->TEST.ZTST == 1;
#ifdef PCSX2_DEVBUILD
if (GSLocalMemory::m_psm[m_context->FRAME.PSM].fmt >= 3 || (GSLocalMemory::m_psm[m_context->ZBUF.PSM].fmt >= 3 && !ignoreZ))
{
Console.Warning("GS: Possible invalid draw, Frame PSM %x ZPSM %x", m_context->FRAME.PSM, m_context->ZBUF.PSM);
}
#endif
m_vt.Update(m_vertex.buff, m_index.buff, m_vertex.tail, m_index.tail, GSUtil::GetPrimClass(PRIM->PRIM));
m_context->SaveReg();
try
{
Draw();
@ -1665,8 +1653,6 @@ void GSState::FlushPrim()
Console.Error("GS: Memory allocation failure.");
}
m_context->RestoreReg();
g_perfmon.Put(GSPerfMon::Draw, 1);
g_perfmon.Put(GSPerfMon::Prim, m_index.tail / GSUtil::GetVertexCount(PRIM->PRIM));
@ -2540,7 +2526,7 @@ int GSState::Defrost(const freezeData* fd)
UpdateVertexKick();
m_env.UpdateDIMX();
UpdateDIMX();
for (u32 i = 0; i < 2; i++)
{
@ -2575,12 +2561,13 @@ void GSState::UpdateCRCHacks()
void GSState::UpdateContext()
{
const bool ctx_switch = (m_context != &m_env.CTXT[PRIM->CTXT]);
const bool ctx_switch = (m_context != &m_draw_env->CTXT[PRIM->CTXT]);
if (ctx_switch)
GL_REG("Context Switch %d", PRIM->CTXT);
m_context = &m_env.CTXT[PRIM->CTXT];
// TODO: Don't mutate context (looking at you, HW)
m_context = const_cast<GSDrawingContext*>(&m_draw_env->CTXT[PRIM->CTXT]);
UpdateScissor();
}
@ -3173,8 +3160,11 @@ __forceinline void GSState::VertexKick(u32 skip)
if (m_index.tail == 0 && ((m_backed_up_ctx != m_env.PRIM.CTXT) || m_dirty_gs_regs))
{
CopyEnv(&m_prev_env, &m_env, m_env.PRIM.CTXT);
memcpy(&m_prev_env.CTXT[m_prev_env.PRIM.CTXT].offset, &m_env.CTXT[m_prev_env.PRIM.CTXT].offset, sizeof(m_env.CTXT[m_prev_env.PRIM.CTXT].offset));
const int ctx = m_env.PRIM.CTXT;
std::memcpy(&m_prev_env, &m_env, 88);
std::memcpy(&m_prev_env.CTXT[ctx], &m_env.CTXT[ctx], 96);
std::memcpy(&m_prev_env.CTXT[ctx].offset, &m_env.CTXT[ctx].offset, sizeof(m_env.CTXT[ctx].offset));
std::memcpy(&m_prev_env.CTXT[ctx].scissor, &m_env.CTXT[ctx].scissor, sizeof(m_env.CTXT[ctx].scissor));
m_dirty_gs_regs = 0;
m_backed_up_ctx = m_env.PRIM.CTXT;
}
@ -3583,7 +3573,7 @@ void GSState::CalcAlphaMinMax()
GSVector4i a = m_vt.m_min.c.uph32(m_vt.m_max.c).zzww();
if (PRIM->TME && context->TEX0.TCC)
{
const GSDrawingEnvironment& env = m_env;
const GSDrawingEnvironment& env = *m_draw_env;
switch (GSLocalMemory::m_psm[context->TEX0.PSM].fmt)
{
@ -3824,6 +3814,19 @@ bool GSState::IsCoverageAlpha()
return !PRIM->ABE && PRIM->AA1 && (m_vt.m_primclass == GS_LINE_CLASS || m_vt.m_primclass == GS_TRIANGLE_CLASS);
}
void GSState::UpdateDIMX()
{
const GIFRegDIMX& DIMX = m_draw_env->DIMX;
dimx[1] = GSVector4i(DIMX.DM00, 0, DIMX.DM01, 0, DIMX.DM02, 0, DIMX.DM03, 0);
dimx[0] = dimx[1].xxzzlh();
dimx[3] = GSVector4i(DIMX.DM10, 0, DIMX.DM11, 0, DIMX.DM12, 0, DIMX.DM13, 0);
dimx[2] = dimx[3].xxzzlh();
dimx[5] = GSVector4i(DIMX.DM20, 0, DIMX.DM21, 0, DIMX.DM22, 0, DIMX.DM23, 0);
dimx[4] = dimx[5].xxzzlh();
dimx[7] = GSVector4i(DIMX.DM30, 0, DIMX.DM31, 0, DIMX.DM32, 0, DIMX.DM33, 0);
dimx[6] = dimx[7].xxzzlh();
}
GIFRegTEX0 GSState::GetTex0Layer(u32 lod)
{
// Shortcut
@ -3876,33 +3879,6 @@ GIFRegTEX0 GSState::GetTex0Layer(u32 lod)
return TEX0;
}
bool GSState::IsTBPFrameOrZ(u32 tbp) const
{
GSDrawingContext* context = m_context;
const bool is_frame = (context->FRAME.Block() == tbp);
const bool is_z = (context->ZBUF.Block() == tbp);
if (!is_frame && !is_z)
return false;
const u32 fm = context->FRAME.FBMSK;
const u32 zm = context->ZBUF.ZMSK || context->TEST.ZTE == 0 ? 0xffffffff : 0;
const u32 fm_mask = GSLocalMemory::m_psm[m_context->FRAME.PSM].fmsk;
const u32 max_z = (0xFFFFFFFF >> (GSLocalMemory::m_psm[context->ZBUF.PSM].fmt * 8));
const bool no_rt = (context->ALPHA.IsCd() && PRIM->ABE && (context->FRAME.PSM == 1))
|| (!context->TEST.DATE && (context->FRAME.FBMSK & GSLocalMemory::m_psm[context->FRAME.PSM].fmsk) == GSLocalMemory::m_psm[context->FRAME.PSM].fmsk);
const bool no_ds = (
// Depth is always pass/fail (no read) and write are discarded.
(zm != 0 && context->TEST.ZTST <= ZTST_ALWAYS) ||
// Depth test will always pass
(zm != 0 && context->TEST.ZTST == ZTST_GEQUAL && m_vt.m_eq.z && std::min(m_vertex.buff[0].XYZ.Z, max_z) == max_z) ||
// Depth will be written through the RT
(!no_rt && context->FRAME.FBP == context->ZBUF.ZBP && !PRIM->TME && zm == 0 && (fm & fm_mask) == 0 && context->TEST.ZTE));
// Relying a lot on the optimizer here... I don't like it.
return (is_frame && !no_rt) || (is_z && !no_ds);
}
// GSTransferBuffer
GSState::GSTransferBuffer::GSTransferBuffer()

View File

@ -205,6 +205,7 @@ protected:
bool IsMipMapDraw();
bool IsMipMapActive();
bool IsCoverageAlpha();
void UpdateDIMX();
public:
struct GSUploadQueue
@ -215,14 +216,15 @@ public:
};
GIFPath m_path[4] = {};
GIFRegPRIM* PRIM = nullptr;
const GIFRegPRIM* PRIM = nullptr;
GSPrivRegSet* m_regs = nullptr;
GSLocalMemory m_mem;
GSDrawingEnvironment m_env = {};
GSDrawingEnvironment m_backup_env = {};
GSDrawingEnvironment m_prev_env = {};
GSVector4i temp_draw_rect = {};
const GSDrawingEnvironment* m_draw_env = &m_env;
GSDrawingContext* m_context = nullptr;
GSVector4i temp_draw_rect = {};
GSVector4i dimx[8] = {};
u32 m_crc = 0;
CRC::Game m_game = {};
std::unique_ptr<GSDumpBase> m_dump;
@ -881,7 +883,6 @@ public:
virtual void Reset(bool hardware_reset);
virtual void UpdateSettings(const Pcsx2Config::GSOptions& old_config);
void CopyEnv(GSDrawingEnvironment* dest, GSDrawingEnvironment* src, int ctx);
void Flush(GSFlushReason reason);
void FlushPrim();
bool TestDrawChanged();
@ -918,7 +919,4 @@ public:
PRIM_OVERLAP PrimitiveOverlap();
GIFRegTEX0 GetTex0Layer(u32 lod);
/// Returns true if the specified texture address matches the frame or Z buffer.
bool IsTBPFrameOrZ(u32 tbp) const;
};

View File

@ -28,6 +28,12 @@ static CRCHackLevel s_crc_hack_level = CRCHackLevel::Full;
#define RPRIM r.PRIM
#define RCONTEXT r.m_context
#define RTEX0 r.m_cached_ctx.TEX0
#define RTEST r.m_cached_ctx.TEST
#define RFRAME r.m_cached_ctx.FRAME
#define RZBUF r.m_cached_ctx.ZBUF
#define RCLAMP r.m_cached_ctx.CLAMP
////////////////////////////////////////////////////////////////////////////////
// Partial level, broken on all renderers.
////////////////////////////////////////////////////////////////////////////////
@ -266,9 +272,9 @@ bool GSHwHack::GSC_BlackAndBurnoutSky(GSRendererHW& r, const GSFrameInfo& fi, in
if (skip != 0)
return true;
const GIFRegTEX0& TEX0 = RCONTEXT->TEX0;
const GIFRegTEX0& TEX0 = RTEX0;
const GIFRegFRAME& FRAME = RFRAME;
const GIFRegALPHA& ALPHA = RCONTEXT->ALPHA;
const GIFRegFRAME& FRAME = RCONTEXT->FRAME;
if (RPRIM->PRIM == GS_SPRITE && !RPRIM->IIP && RPRIM->TME && !RPRIM->FGE && RPRIM->ABE && !RPRIM->AA1 && !RPRIM->FST && !RPRIM->FIX &&
ALPHA.A == ALPHA.B && ALPHA.D == 0 && FRAME.PSM == PSM_PSMCT32 && TEX0.CPSM == PSM_PSMCT32 && TEX0.TCC && !TEX0.TFX && !TEX0.CSM)
@ -530,11 +536,11 @@ bool GSHwHack::GSC_UrbanReign(GSRendererHW& r, const GSFrameInfo& fi, int& skip)
// from matching the last column, because it's trying to fit the last 65 columns of a 640x448 (effectively 641x448)
// texture into a 640x448 render target.
if (fi.TME && fi.TBP0 != fi.FBP && fi.FPSM == PSM_PSMCT32 && fi.TPSM == PSM_PSMCT32 &&
RCONTEXT->FRAME.FBW == (RCONTEXT->TEX0.TBW / 2) && RCONTEXT->CLAMP.WMS == CLAMP_REGION_CLAMP &&
RCONTEXT->CLAMP.WMT == CLAMP_REGION_CLAMP && ((r.m_vt.m_max.t == GSVector4(64.0f, 448.0f)).mask() == 0x3))
RFRAME.FBW == (RTEX0.TBW / 2) && RCLAMP.WMS == CLAMP_REGION_CLAMP &&
RCLAMP.WMT == CLAMP_REGION_CLAMP && ((r.m_vt.m_max.t == GSVector4(64.0f, 448.0f)).mask() == 0x3))
{
GL_CACHE("GSC_UrbanReign: Fix region clamp to 64 wide");
r.m_context->CLAMP.MAXU = 63;
RCLAMP.MAXU = 63;
}
}
@ -769,18 +775,18 @@ bool GSHwHack::OI_PointListPalette(GSRendererHW& r, GSTexture* rt, GSTexture* ds
&& !r.PRIM->FGE // No FOG.
&& !r.PRIM->AA1 // No antialiasing.
&& !r.PRIM->FIX // Normal fragment value control.
&& !r.m_env.DTHE.DTHE // No dithering.
&& !r.m_context->TEST.ATE // No alpha test.
&& !r.m_context->TEST.DATE // No destination alpha test.
&& (!r.m_context->DepthRead() && !r.m_context->DepthWrite()) // No depth handling.
&& !r.m_context->TEX0.CSM // No CLUT usage.
&& !r.m_env.PABE.PABE // No PABE.
&& !r.m_draw_env->DTHE.DTHE // No dithering.
&& !r.m_cached_ctx.TEST.ATE // No alpha test.
&& !r.m_cached_ctx.TEST.DATE // No destination alpha test.
&& (!r.m_cached_ctx.DepthRead() && !r.m_cached_ctx.DepthWrite()) // No depth handling.
&& !RTEX0.CSM // No CLUT usage.
&& !r.m_draw_env->PABE.PABE // No PABE.
&& r.m_context->FBA.FBA == 0 // No Alpha Correction.
&& r.m_context->FRAME.FBMSK == 0 // No frame buffer masking.
&& r.m_cached_ctx.FRAME.FBMSK == 0 // No frame buffer masking.
)
{
const u32 FBP = r.m_context->FRAME.Block();
const u32 FBW = r.m_context->FRAME.FBW;
const u32 FBP = r.m_cached_ctx.FRAME.Block();
const u32 FBW = r.m_cached_ctx.FRAME.FBW;
GL_INS("PointListPalette - m_r = <%d, %d => %d, %d>, n_vertices = %zu, FBP = 0x%x, FBW = %u", r.m_r.x, r.m_r.y, r.m_r.z, r.m_r.w, n_vertices, FBP, FBW);
const GSVertex* RESTRICT v = r.m_vertex.buff;
const int ox(r.m_context->XYOFFSET.OFX);
@ -813,11 +819,11 @@ bool GSHwHack::OI_BigMuthaTruckers(GSRendererHW& r, GSTexture* rt, GSTexture* ds
// vertical resolution is half so only half is processed at once
// We, however, don't have this limitation so we'll replace the draw with a full-screen TS.
const GIFRegTEX0& Texture = RCONTEXT->TEX0;
const GIFRegTEX0& Texture = RTEX0;
GIFRegTEX0 Frame = {};
Frame.TBW = RCONTEXT->FRAME.FBW;
Frame.TBP0 = RCONTEXT->FRAME.Block();
Frame.TBW = RFRAME.FBW;
Frame.TBP0 = RFRAME.Block();
if (RPRIM->TME && Frame.TBW == 10 && Texture.TBW == 10 && Frame.TBP0 == 0x00a00 && Texture.PSM == PSM_PSMT8H && (r.m_r.y == 256 || r.m_r.y == 224))
{
@ -937,11 +943,11 @@ bool GSHwHack::OI_FFXII(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTexture
bool GSHwHack::OI_FFX(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
const u32 FBP = RCONTEXT->FRAME.Block();
const u32 ZBP = RCONTEXT->ZBUF.Block();
const u32 TBP = RCONTEXT->TEX0.TBP0;
const u32 FBP = RFRAME.Block();
const u32 ZBP = RZBUF.Block();
const u32 TBP = RTEX0.TBP0;
if ((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && RPRIM->TME && TBP == 0x01a00 && RCONTEXT->TEX0.PSM == PSM_PSMCT16S)
if ((FBP == 0x00d00 || FBP == 0x00000) && ZBP == 0x02100 && RPRIM->TME && TBP == 0x01a00 && RTEX0.PSM == PSM_PSMCT16S)
{
// random battle transition (z buffer written directly, clear it now)
GL_INS("OI_FFX ZB clear");
@ -956,8 +962,8 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu
{
if (!RPRIM->TME)
{
const u32 FBP = RCONTEXT->FRAME.Block();
const u32 ZBP = RCONTEXT->ZBUF.Block();
const u32 FBP = RFRAME.Block();
const u32 ZBP = RZBUF.Block();
if (FBP == 0x008c0 && ZBP == 0x01a40)
{
@ -966,8 +972,8 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu
GIFRegTEX0 TEX0 = {};
TEX0.TBP0 = ZBP;
TEX0.TBW = RCONTEXT->FRAME.FBW;
TEX0.PSM = RCONTEXT->FRAME.PSM;
TEX0.TBW = RFRAME.FBW;
TEX0.PSM = RFRAME.PSM;
if (GSTextureCache::Target* tmp_rt = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::RenderTarget))
{
@ -977,15 +983,15 @@ bool GSHwHack::OI_RozenMaidenGebetGarden(GSRendererHW& r, GSTexture* rt, GSTextu
return false;
}
else if (FBP == 0x00000 && RCONTEXT->ZBUF.Block() == 0x01180)
else if (FBP == 0x00000 && RZBUF.Block() == 0x01180)
{
// z buffer clear, frame buffer now points to the z buffer (how can they be so clever?)
GIFRegTEX0 TEX0 = {};
TEX0.TBP0 = FBP;
TEX0.TBW = RCONTEXT->FRAME.FBW;
TEX0.PSM = RCONTEXT->ZBUF.PSM;
TEX0.TBW = RFRAME.FBW;
TEX0.PSM = RZBUF.PSM;
if (GSTextureCache::Target* tmp_ds = g_texture_cache->LookupTarget(TEX0, r.GetTargetSize(), r.GetTextureScaleFactor(), GSTextureCache::DepthStencil))
{
@ -1008,12 +1014,12 @@ bool GSHwHack::OI_SonicUnleashed(GSRendererHW& r, GSTexture* rt, GSTexture* ds,
// save result in alpha with a TS,
// Restore RG channel that we previously copied to render shadows.
const GIFRegTEX0& Texture = RCONTEXT->TEX0;
const GIFRegTEX0& Texture = RTEX0;
GIFRegTEX0 Frame = {};
Frame.TBW = RCONTEXT->FRAME.FBW;
Frame.TBP0 = RCONTEXT->FRAME.Block();
Frame.PSM = RCONTEXT->FRAME.PSM;
Frame.TBW = RFRAME.FBW;
Frame.TBP0 = RFRAME.Block();
Frame.PSM = RFRAME.PSM;
if ((!RPRIM->TME) || (GSLocalMemory::m_psm[Texture.PSM].bpp != 16) || (GSLocalMemory::m_psm[Frame.PSM].bpp != 16) || (Texture.TBP0 == Frame.TBP0) || (Frame.TBW != 16 && Texture.TBW != 16))
return true;
@ -1073,7 +1079,7 @@ bool GSHwHack::OI_ArTonelico2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GST
const GSVertex* v = &r.m_vertex.buff[0];
if (r.m_vertex.next == 2 && !RPRIM->TME && RCONTEXT->FRAME.FBW == 10 && v->XYZ.Z == 0 && RCONTEXT->TEST.ZTST == ZTST_ALWAYS)
if (r.m_vertex.next == 2 && !RPRIM->TME && RFRAME.FBW == 10 && v->XYZ.Z == 0 && RTEST.ZTST == ZTST_ALWAYS)
{
GL_INS("OI_ArTonelico2");
g_gs_device->ClearDepth(ds);
@ -1123,10 +1129,10 @@ bool GSHwHack::GSC_Battlefield2(GSRendererHW& r, const GSFrameInfo& fi, int& ski
bool GSHwHack::OI_Battlefield2(GSRendererHW& r, GSTexture* rt, GSTexture* ds, GSTextureCache::Source* t)
{
if (!RPRIM->TME || RCONTEXT->FRAME.Block() > 0xD00 || RCONTEXT->TEX0.TBP0 > 0x1D00)
if (!RPRIM->TME || RFRAME.Block() > 0xD00 || RTEX0.TBP0 > 0x1D00)
return true;
if (rt && t && RCONTEXT->FRAME.Block() == 0 && RCONTEXT->TEX0.TBP0 == 0x1000)
if (rt && t && RFRAME.Block() == 0 && RTEX0.TBP0 == 0x1000)
{
const GSVector4i rc(0, 0, std::min(rt->GetWidth(), t->m_texture->GetWidth()), std::min(rt->GetHeight(), t->m_texture->GetHeight()));
g_gs_device->CopyRect(t->m_texture, rt, rc, 0, 0);
@ -1143,16 +1149,21 @@ bool GSHwHack::OI_HauntingGround(GSRendererHW& r, GSTexture* rt, GSTexture* ds,
if (rt && !ds && !t && r.IsConstantDirectWriteMemClear(true))
{
GL_CACHE("GSHwHack::OI_HauntingGround()");
g_texture_cache->InvalidateVideoMemTargets(GSTextureCache::RenderTarget, RCONTEXT->FRAME.Block(),
RCONTEXT->FRAME.FBW, RCONTEXT->FRAME.PSM, r.m_r);
g_texture_cache->InvalidateVideoMemTargets(GSTextureCache::RenderTarget, RFRAME.Block(), RFRAME.FBW, RFRAME.PSM, r.m_r);
}
// Not skipping anything. This is just an invalidation hack.
return true;
}
#undef RCONTEXT
#undef RPRIM
#undef RCONTEXT
#undef RTEX0
#undef RTEST
#undef RFRAME
#undef RZBUF
#undef RCLAMP
#undef CRC_Partial
#undef CRC_Full
@ -1289,6 +1300,7 @@ bool GSRendererHW::IsBadFrame()
{
if (m_gsc)
{
// GSC occurs before cached regs are set up
const GSFrameInfo fi = {
m_context->FRAME.Block(),
m_context->FRAME.PSM,

File diff suppressed because it is too large Load Diff

View File

@ -109,6 +109,30 @@ private:
GSVector4i GetSplitTextureShuffleDrawRect() const;
GSVector4i m_r = {};
// We modify some of the context registers to optimize away unnecessary operations.
// Instead of messing with the real context, we copy them and use those instead.
struct
{
GIFRegTEX0 TEX0;
GIFRegCLAMP CLAMP;
GIFRegTEST TEST;
GIFRegFRAME FRAME;
GIFRegZBUF ZBUF;
__ri bool DepthRead() const { return TEST.ZTE && TEST.ZTST >= 2; }
__ri bool DepthWrite() const
{
if (TEST.ATE && TEST.ATST == ATST_NEVER &&
TEST.AFAIL != AFAIL_ZB_ONLY) // alpha test, all pixels fail, z buffer is not updated
{
return false;
}
return ZBUF.ZMSK == 0 && TEST.ZTE != 0; // ZTE == 0 is bug on the real hardware, write is blocked then
}
} m_cached_ctx;
// CRC Hacks
bool IsBadFrame();
@ -182,4 +206,7 @@ public:
/// Called by the texture cache to know for certain whether there is a channel shuffle.
bool TestChannelShuffle(GSTextureCache::Target* src);
/// Returns true if the specified texture address matches the frame or Z buffer.
bool IsTBPFrameOrZ(u32 tbp) const;
};

View File

@ -41,7 +41,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
GSVertexTrace& vt = hw.m_vt;
const GIFRegPRIM* PRIM = hw.PRIM;
const GSDrawingContext* context = hw.m_context;
const GSDrawingEnvironment& env = hw.m_env;
const GSDrawingEnvironment& env = *hw.m_draw_env;
const GS_PRIM_CLASS primclass = vt.m_primclass;
GSRasterizerData data;
@ -58,7 +58,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
data.vertex_count = hw.m_vertex.next;
data.index = hw.m_index.buff;
data.index_count = hw.m_index.tail;
data.scanmsk_value = hw.m_env.SCANMSK.MSK;
data.scanmsk_value = env.SCANMSK.MSK;
// Skip per pixel division if q is constant.
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !vt.m_eq.q.
@ -455,7 +455,7 @@ bool GSRendererHWFunctions::SwPrimRender(GSRendererHW& hw, bool invalidate_tc)
gd.dimx = dimx_storage;
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
memcpy(gd.dimx, hw.dimx, sizeof(hw.dimx));
}
}

View File

@ -327,7 +327,7 @@ void GSRendererSW::Draw()
// Dump Register state
s = GetDrawDumpPath("%05d_context.txt", s_n);
m_env.Dump(s);
m_draw_env->Dump(s);
m_context->Dump(s);
// Dump vertices
@ -345,7 +345,7 @@ void GSRendererSW::Draw()
sd->vertex_count = m_vertex.next;
sd->index = (u32*)(sd->buff + sizeof(GSVertexSW) * ((m_vertex.next + 1) & ~1));
sd->index_count = m_index.tail;
sd->scanmsk_value = m_env.SCANMSK.MSK;
sd->scanmsk_value = m_draw_env->SCANMSK.MSK;
// skip per pixel division if q is constant.
// Optimize the division by 1 with a nop. It also means that GS_SPRITE_CLASS must be processed when !m_vt.m_eq.q.
@ -939,7 +939,7 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{
GSScanlineGlobalData& gd = data->global;
const GSDrawingEnvironment& env = m_env;
const GSDrawingEnvironment& env = *m_draw_env;
const GSDrawingContext* context = m_context;
const GS_PRIM_CLASS primclass = m_vt.m_primclass;
@ -1327,9 +1327,9 @@ bool GSRendererSW::GetScanlineGlobalData(SharedData* data)
{
gd.sel.dthe = 1;
gd.dimx = (GSVector4i*)m_vertex_heap.alloc(sizeof(env.dimx), 32);
gd.dimx = (GSVector4i*)m_vertex_heap.alloc(sizeof(dimx), 32);
memcpy(gd.dimx, env.dimx, sizeof(env.dimx));
memcpy(gd.dimx, dimx, sizeof(dimx));
}
}